xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
264a2a386eSRichard Tran Mills 
279371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
284a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
294a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
304a2a386eSRichard Tran Mills   Mat B = *newmat;
31ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
324a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
33c1d5218aSRichard Tran Mills #endif
344a2a386eSRichard Tran Mills 
354a2a386eSRichard Tran Mills   PetscFunctionBegin;
369566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
374a2a386eSRichard Tran Mills 
384a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
3954871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
404a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
414a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4254871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
43ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4454871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
45ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
46190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
47431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
48e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
49190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
514f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
524a2a386eSRichard Tran Mills 
539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
544222ddf1SHong Zhang 
55ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
564abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
58e9c94282SRichard Tran Mills    * the spptr pointer. */
59a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
60a8327b06SKarl Rupp 
61792fecdfSBarry Smith   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
62ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
639566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
644a2a386eSRichard Tran Mills 
654a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
669566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
674a2a386eSRichard Tran Mills 
684a2a386eSRichard Tran Mills   *newmat = B;
694a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
704a2a386eSRichard Tran Mills }
714a2a386eSRichard Tran Mills 
729371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) {
734a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
744a2a386eSRichard Tran Mills 
754a2a386eSRichard Tran Mills   PetscFunctionBegin;
76e9c94282SRichard Tran Mills 
77edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
78e9c94282SRichard Tran Mills   if (aijmkl) {
794a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
80ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
81792fecdfSBarry Smith     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
824abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
839566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
84e9c94282SRichard Tran Mills   }
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
874a2a386eSRichard Tran Mills    * to destroy everything that remains. */
889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
894a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
904a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
914a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
922e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
939566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
944a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
954a2a386eSRichard Tran Mills }
964a2a386eSRichard Tran Mills 
97190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
985b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
995b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1005b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1015b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1025b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1035b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1049371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) {
105ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1066e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1076e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1086e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
10945fbe478SRichard Tran Mills   PetscFunctionBegin;
1106e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1116e369cd5SRichard Tran Mills #else
112a8327b06SKarl Rupp   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
113a8327b06SKarl Rupp   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
114a8327b06SKarl Rupp   PetscInt       m, n;
115a8327b06SKarl Rupp   MatScalar     *aa;
116a8327b06SKarl Rupp   PetscInt      *aj, *ai;
1174a2a386eSRichard Tran Mills 
118a8327b06SKarl Rupp   PetscFunctionBegin;
119e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
120e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
121e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
122e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
123e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1246e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1254d51fa23SRichard Tran Mills #endif
1266e369cd5SRichard Tran Mills 
1270632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1280632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1290632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
130792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
1310632b357SRichard Tran Mills   }
1328d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1336e369cd5SRichard Tran Mills 
134c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
135df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
136df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
137df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
13858678438SRichard Tran Mills   m                  = A->rmap->n;
13958678438SRichard Tran Mills   n                  = A->cmap->n;
140df555b71SRichard Tran Mills   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
141df555b71SRichard Tran Mills   aa                 = a->a; /* Nonzero elements stored row-by-row. */
142df555b71SRichard Tran Mills   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
1431495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1448d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1458d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
146792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
147792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
148792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
149*48a46eb9SPierre Jolivet     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
1504abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
1519566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
152190ae7a4SRichard Tran Mills   } else {
153190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
154c9d46305SRichard Tran Mills   }
1556e369cd5SRichard Tran Mills 
1566e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
157d995685eSRichard Tran Mills #endif
1586e369cd5SRichard Tran Mills }
1596e369cd5SRichard Tran Mills 
160b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
161190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
1629371c9d4SSatish Balay static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) {
16319afcda9SRichard Tran Mills   sparse_index_base_t indexing;
164190ae7a4SRichard Tran Mills   PetscInt            m, n;
16545fbe478SRichard Tran Mills   PetscInt           *aj, *ai, *dummy;
16619afcda9SRichard Tran Mills   MatScalar          *aa;
16719afcda9SRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl;
16819afcda9SRichard Tran Mills 
169362febeeSStefano Zampini   PetscFunctionBegin;
170190ae7a4SRichard Tran Mills   if (csrA) {
17145fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
172792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
1735f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
174190ae7a4SRichard Tran Mills   } else {
175190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
176190ae7a4SRichard Tran Mills     aa      = PETSC_NULL;
177aab60f1bSRichard Tran Mills   }
178190ae7a4SRichard Tran Mills 
1799566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
1809566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
181aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
182aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
183aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
184aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
185190ae7a4SRichard Tran Mills   if (csrA) {
1869566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
187190ae7a4SRichard Tran Mills   } else {
188190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1899566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1909566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1919566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
192190ae7a4SRichard Tran Mills   }
19319afcda9SRichard Tran Mills 
19419afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
19519afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
1969566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1976c87cf42SRichard Tran Mills 
19819afcda9SRichard Tran Mills   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
19919afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2006c87cf42SRichard Tran Mills 
20119afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20219afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20319afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
204f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
205f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
206f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
207190ae7a4SRichard Tran Mills   if (csrA) {
208792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
209792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
2101950a7e7SRichard Tran Mills   }
2119566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
21219afcda9SRichard Tran Mills   PetscFunctionReturn(0);
21319afcda9SRichard Tran Mills }
214b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
215190ae7a4SRichard Tran Mills 
216e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
217e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
218e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
219b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
2209371c9d4SSatish Balay static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) {
221e8be1fc7SRichard Tran Mills   PetscInt            i;
222e8be1fc7SRichard Tran Mills   PetscInt            nrows, ncols;
223e8be1fc7SRichard Tran Mills   PetscInt            nz;
224e8be1fc7SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
225e8be1fc7SRichard Tran Mills   PetscScalar        *aa;
2261495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
227e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
228e8be1fc7SRichard Tran Mills 
229362febeeSStefano Zampini   PetscFunctionBegin;
230190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
231190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
232190ae7a4SRichard Tran Mills 
233e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
234792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
235e8be1fc7SRichard Tran Mills 
236e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
237e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
238e8be1fc7SRichard Tran Mills   for (i = 0; i < nrows; i++) {
239e8be1fc7SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2409566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
241e8be1fc7SRichard Tran Mills   }
242e8be1fc7SRichard Tran Mills 
2439566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2449566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
245e8be1fc7SRichard Tran Mills 
2469566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
247a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
248a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
249a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
250a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
251e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
252e8be1fc7SRichard Tran Mills }
253b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
254e8be1fc7SRichard Tran Mills 
2553849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2569371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) {
2573849ddb2SRichard Tran Mills   PetscInt            i, j, k;
2583849ddb2SRichard Tran Mills   PetscInt            nrows, ncols;
2593849ddb2SRichard Tran Mills   PetscInt            nz;
2603849ddb2SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
2613849ddb2SRichard Tran Mills   PetscScalar        *aa;
2621495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2633849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2643849ddb2SRichard Tran Mills 
265362febeeSStefano Zampini   PetscFunctionBegin;
2669566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2673849ddb2SRichard Tran Mills 
2683849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2693849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2709566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
2713849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
2723849ddb2SRichard Tran Mills   }
2733849ddb2SRichard Tran Mills 
2743849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
2763849ddb2SRichard Tran Mills 
2773849ddb2SRichard Tran Mills   k = 0;
2783849ddb2SRichard Tran Mills   for (i = 0; i < nrows; i++) {
2799566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
2803849ddb2SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2813849ddb2SRichard Tran Mills     for (j = 0; j < nz; j++) {
2823849ddb2SRichard Tran Mills       if (aa) {
2839566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
2843849ddb2SRichard Tran Mills       } else {
2859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
2863849ddb2SRichard Tran Mills       }
2873849ddb2SRichard Tran Mills       k++;
2883849ddb2SRichard Tran Mills     }
2899566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
2903849ddb2SRichard Tran Mills   }
2913849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
2923849ddb2SRichard Tran Mills }
2933849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
2943849ddb2SRichard Tran Mills 
2959371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) {
2961495fedeSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2976e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
2986e369cd5SRichard Tran Mills 
2996e369cd5SRichard Tran Mills   PetscFunctionBegin;
3009566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
3016e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
3029566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
3036e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3041baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3056e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3066e369cd5SRichard Tran Mills }
3076e369cd5SRichard Tran Mills 
3089371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) {
3096e369cd5SRichard Tran Mills   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
3105b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
3116e369cd5SRichard Tran Mills 
3126e369cd5SRichard Tran Mills   PetscFunctionBegin;
3136e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3146e369cd5SRichard Tran Mills 
3156e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3166e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3176e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3186e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
319d96e85feSRichard Tran Mills    * a lot of code duplication. */
3206e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
3219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3226e369cd5SRichard Tran Mills 
3235b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3245b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3255b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3261baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
327df555b71SRichard Tran Mills 
3284a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3294a2a386eSRichard Tran Mills }
3304a2a386eSRichard Tran Mills 
331e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3329371c9d4SSatish Balay PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
3334a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3344a2a386eSRichard Tran Mills   const PetscScalar *x;
3354a2a386eSRichard Tran Mills   PetscScalar       *y;
3364a2a386eSRichard Tran Mills   const MatScalar   *aa;
3374a2a386eSRichard Tran Mills   PetscInt           m     = A->rmap->n;
338db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
339db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
340db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
3414a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
342db63039fSRichard Tran Mills   char               matdescra[6];
343db63039fSRichard Tran Mills 
3444a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
345ff03dc53SRichard Tran Mills   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
346ff03dc53SRichard Tran Mills 
347ff03dc53SRichard Tran Mills   PetscFunctionBegin;
348db63039fSRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
349db63039fSRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
3509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
352ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
353ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
354ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
355ff03dc53SRichard Tran Mills 
356ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
357db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
358ff03dc53SRichard Tran Mills 
3599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
362ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
363ff03dc53SRichard Tran Mills }
3641950a7e7SRichard Tran Mills #endif
365ff03dc53SRichard Tran Mills 
366ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
3679371c9d4SSatish Balay PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
368df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
369df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
370df555b71SRichard Tran Mills   const PetscScalar *x;
371df555b71SRichard Tran Mills   PetscScalar       *y;
372551aa5c8SRichard Tran Mills   PetscObjectState   state;
373df555b71SRichard Tran Mills 
374df555b71SRichard Tran Mills   PetscFunctionBegin;
375df555b71SRichard Tran Mills 
37638987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
37738987b35SRichard Tran Mills   if (!a->nz) {
3789566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
3799566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->rmap->n));
3809566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
38138987b35SRichard Tran Mills     PetscFunctionReturn(0);
38238987b35SRichard Tran Mills   }
383f36dfe3fSRichard Tran Mills 
3849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
386df555b71SRichard Tran Mills 
3873fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3883fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3893fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3909566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
3919566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3923fa15762SRichard Tran Mills 
393df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
394792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
395df555b71SRichard Tran Mills 
3969566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
399df555b71SRichard Tran Mills   PetscFunctionReturn(0);
400df555b71SRichard Tran Mills }
401d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402df555b71SRichard Tran Mills 
403e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
4049371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
405ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
406ff03dc53SRichard Tran Mills   const PetscScalar *x;
407ff03dc53SRichard Tran Mills   PetscScalar       *y;
408ff03dc53SRichard Tran Mills   const MatScalar   *aa;
409ff03dc53SRichard Tran Mills   PetscInt           m     = A->rmap->n;
410db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
411db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
412db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
413ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
414db63039fSRichard Tran Mills   char               matdescra[6];
415ff03dc53SRichard Tran Mills 
416ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
417ff03dc53SRichard Tran Mills   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
4184a2a386eSRichard Tran Mills 
4194a2a386eSRichard Tran Mills   PetscFunctionBegin;
420969800c5SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
421969800c5SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
4229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4244a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
4254a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
4264a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
4274a2a386eSRichard Tran Mills 
4284a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
429db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
4304a2a386eSRichard Tran Mills 
4319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4344a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4354a2a386eSRichard Tran Mills }
4361950a7e7SRichard Tran Mills #endif
4374a2a386eSRichard Tran Mills 
438ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
4399371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
440df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
441df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
442df555b71SRichard Tran Mills   const PetscScalar *x;
443df555b71SRichard Tran Mills   PetscScalar       *y;
444551aa5c8SRichard Tran Mills   PetscObjectState   state;
445df555b71SRichard Tran Mills 
446df555b71SRichard Tran Mills   PetscFunctionBegin;
447df555b71SRichard Tran Mills 
44838987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
44938987b35SRichard Tran Mills   if (!a->nz) {
4509566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
4519566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->cmap->n));
4529566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
45338987b35SRichard Tran Mills     PetscFunctionReturn(0);
45438987b35SRichard Tran Mills   }
455f36dfe3fSRichard Tran Mills 
4569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
458df555b71SRichard Tran Mills 
4593fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4603fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4613fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4629566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4639566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4643fa15762SRichard Tran Mills 
465df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
466792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
467df555b71SRichard Tran Mills 
4689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
471df555b71SRichard Tran Mills   PetscFunctionReturn(0);
472df555b71SRichard Tran Mills }
473d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
474df555b71SRichard Tran Mills 
475e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
4769371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
4774a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
4784a2a386eSRichard Tran Mills   const PetscScalar *x;
4794a2a386eSRichard Tran Mills   PetscScalar       *y, *z;
4804a2a386eSRichard Tran Mills   const MatScalar   *aa;
4814a2a386eSRichard Tran Mills   PetscInt           m = A->rmap->n;
482db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
4834a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
4844a2a386eSRichard Tran Mills   PetscInt           i;
4854a2a386eSRichard Tran Mills 
486ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
487ff03dc53SRichard Tran Mills   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
488a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
489db63039fSRichard Tran Mills   PetscScalar beta;
490a84739b8SRichard Tran Mills   char        matdescra[6];
491ff03dc53SRichard Tran Mills 
492ff03dc53SRichard Tran Mills   PetscFunctionBegin;
493a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
494a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
495a84739b8SRichard Tran Mills 
4969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
498ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
499ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
500ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
501ff03dc53SRichard Tran Mills 
502ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
503a84739b8SRichard Tran Mills   if (zz == yy) {
504a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
505db63039fSRichard Tran Mills     beta = 1.0;
506db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
507a84739b8SRichard Tran Mills   } else {
508db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
509db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
510db63039fSRichard Tran Mills     beta = 0.0;
511db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
5129371c9d4SSatish Balay     for (i = 0; i < m; i++) { z[i] += y[i]; }
513a84739b8SRichard Tran Mills   }
514ff03dc53SRichard Tran Mills 
5159566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
518ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
519ff03dc53SRichard Tran Mills }
5201950a7e7SRichard Tran Mills #endif
521ff03dc53SRichard Tran Mills 
522ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
5239371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
524df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
525df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
526df555b71SRichard Tran Mills   const PetscScalar *x;
527df555b71SRichard Tran Mills   PetscScalar       *y, *z;
528df555b71SRichard Tran Mills   PetscInt           m = A->rmap->n;
529df555b71SRichard Tran Mills   PetscInt           i;
530df555b71SRichard Tran Mills 
531df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
532551aa5c8SRichard Tran Mills   PetscObjectState state;
533df555b71SRichard Tran Mills 
534df555b71SRichard Tran Mills   PetscFunctionBegin;
535df555b71SRichard Tran Mills 
53638987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
53738987b35SRichard Tran Mills   if (!a->nz) {
5389566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5399566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, m));
5409566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
54138987b35SRichard Tran Mills     PetscFunctionReturn(0);
54238987b35SRichard Tran Mills   }
543df555b71SRichard Tran Mills 
5449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
546df555b71SRichard Tran Mills 
5473fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5483fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5493fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5509566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
5519566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5523fa15762SRichard Tran Mills 
553df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
554df555b71SRichard Tran Mills   if (zz == yy) {
555df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
556df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
557792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
558df555b71SRichard Tran Mills   } else {
559df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
560df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
561792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
5625f80ce2aSJacob Faibussowitsch     for (i = 0; i < m; i++) z[i] += y[i];
563df555b71SRichard Tran Mills   }
564df555b71SRichard Tran Mills 
5659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
568df555b71SRichard Tran Mills   PetscFunctionReturn(0);
569df555b71SRichard Tran Mills }
570d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
571df555b71SRichard Tran Mills 
572e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
5739371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
574ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
575ff03dc53SRichard Tran Mills   const PetscScalar *x;
576ff03dc53SRichard Tran Mills   PetscScalar       *y, *z;
577ff03dc53SRichard Tran Mills   const MatScalar   *aa;
578ff03dc53SRichard Tran Mills   PetscInt           m = A->rmap->n;
579db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
580ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
581ff03dc53SRichard Tran Mills   PetscInt           i;
582ff03dc53SRichard Tran Mills 
583ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
584ff03dc53SRichard Tran Mills   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
585a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
586db63039fSRichard Tran Mills   PetscScalar beta;
587a84739b8SRichard Tran Mills   char        matdescra[6];
5884a2a386eSRichard Tran Mills 
5894a2a386eSRichard Tran Mills   PetscFunctionBegin;
590a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
591a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
592a84739b8SRichard Tran Mills 
5939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5954a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
5964a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
5974a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
5984a2a386eSRichard Tran Mills 
5994a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
600a84739b8SRichard Tran Mills   if (zz == yy) {
601a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
602db63039fSRichard Tran Mills     beta = 1.0;
603969800c5SRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
604a84739b8SRichard Tran Mills   } else {
605db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
606db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
607db63039fSRichard Tran Mills     beta = 0.0;
608db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
6099371c9d4SSatish Balay     for (i = 0; i < n; i++) { z[i] += y[i]; }
610a84739b8SRichard Tran Mills   }
6114a2a386eSRichard Tran Mills 
6129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6154a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6164a2a386eSRichard Tran Mills }
6171950a7e7SRichard Tran Mills #endif
6184a2a386eSRichard Tran Mills 
619ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
6209371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
621df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
622df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
623df555b71SRichard Tran Mills   const PetscScalar *x;
624df555b71SRichard Tran Mills   PetscScalar       *y, *z;
625969800c5SRichard Tran Mills   PetscInt           n = A->cmap->n;
626df555b71SRichard Tran Mills   PetscInt           i;
627551aa5c8SRichard Tran Mills   PetscObjectState   state;
628df555b71SRichard Tran Mills 
629df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630df555b71SRichard Tran Mills 
631df555b71SRichard Tran Mills   PetscFunctionBegin;
632df555b71SRichard Tran Mills 
63338987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
63438987b35SRichard Tran Mills   if (!a->nz) {
6359566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6369566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, n));
6379566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
63838987b35SRichard Tran Mills     PetscFunctionReturn(0);
63938987b35SRichard Tran Mills   }
640f36dfe3fSRichard Tran Mills 
6419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
643df555b71SRichard Tran Mills 
6443fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6453fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6463fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6479566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6485f80ce2aSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
6493fa15762SRichard Tran Mills 
650df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
651df555b71SRichard Tran Mills   if (zz == yy) {
652df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
653df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
654792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
655df555b71SRichard Tran Mills   } else {
656df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
657df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
658792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
6595f80ce2aSJacob Faibussowitsch     for (i = 0; i < n; i++) z[i] += y[i];
660df555b71SRichard Tran Mills   }
661df555b71SRichard Tran Mills 
6629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
665df555b71SRichard Tran Mills   PetscFunctionReturn(0);
666df555b71SRichard Tran Mills }
667d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668df555b71SRichard Tran Mills 
669190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
6708a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
6719371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
6721495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
673431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
674190ae7a4SRichard Tran Mills   PetscInt            nrows, ncols;
675431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
676431879ecSRichard Tran Mills   PetscObjectState    state;
677431879ecSRichard Tran Mills 
678431879ecSRichard Tran Mills   PetscFunctionBegin;
679190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
680190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
681190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
682190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
683190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
684190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
685190ae7a4SRichard Tran Mills 
6869566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6879566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6889566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
6899566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
690431879ecSRichard Tran Mills   csrA                = a->csrA;
691431879ecSRichard Tran Mills   csrB                = b->csrA;
692431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
693431879ecSRichard Tran Mills 
694190ae7a4SRichard Tran Mills   if (csrA && csrB) {
695792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
696190ae7a4SRichard Tran Mills   } else {
697190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
698190ae7a4SRichard Tran Mills   }
699190ae7a4SRichard Tran Mills 
7009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
701431879ecSRichard Tran Mills 
702431879ecSRichard Tran Mills   PetscFunctionReturn(0);
703431879ecSRichard Tran Mills }
704431879ecSRichard Tran Mills 
7059371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
7061495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
707e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
708e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
709e8be1fc7SRichard Tran Mills   PetscObjectState    state;
710e8be1fc7SRichard Tran Mills 
711e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7129566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7139566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7149566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7159566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
716e8be1fc7SRichard Tran Mills   csrA                = a->csrA;
717e8be1fc7SRichard Tran Mills   csrB                = b->csrA;
718e8be1fc7SRichard Tran Mills   csrC                = c->csrA;
719e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720e8be1fc7SRichard Tran Mills 
721190ae7a4SRichard Tran Mills   if (csrA && csrB) {
722792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
723190ae7a4SRichard Tran Mills   } else {
724190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
725190ae7a4SRichard Tran Mills   }
726e8be1fc7SRichard Tran Mills 
727e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7289566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
729e8be1fc7SRichard Tran Mills 
730e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
731e8be1fc7SRichard Tran Mills }
732e8be1fc7SRichard Tran Mills 
7339371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
734190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7359566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
736190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
737190ae7a4SRichard Tran Mills }
738190ae7a4SRichard Tran Mills 
7399371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
740190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7419566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
742190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
743190ae7a4SRichard Tran Mills }
744190ae7a4SRichard Tran Mills 
7459371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
746190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7479566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
748190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
749190ae7a4SRichard Tran Mills }
750190ae7a4SRichard Tran Mills 
7519371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
752190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7539566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
755190ae7a4SRichard Tran Mills }
756190ae7a4SRichard Tran Mills 
7579371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
758190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7599566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
760190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
761190ae7a4SRichard Tran Mills }
762190ae7a4SRichard Tran Mills 
7639371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
764190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7659566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
766190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
767190ae7a4SRichard Tran Mills }
768190ae7a4SRichard Tran Mills 
7699371c9d4SSatish Balay static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
770190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
771190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
772190ae7a4SRichard Tran Mills 
773190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7749566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
775190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
776190ae7a4SRichard Tran Mills }
777190ae7a4SRichard Tran Mills 
7789371c9d4SSatish Balay static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
779190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
780190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
781190ae7a4SRichard Tran Mills   PetscReal    fill = product->fill;
782190ae7a4SRichard Tran Mills 
783190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7849566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
785190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
786190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
787190ae7a4SRichard Tran Mills }
788190ae7a4SRichard Tran Mills 
7899371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) {
790190ae7a4SRichard Tran Mills   Mat                 Ct;
791190ae7a4SRichard Tran Mills   Vec                 zeros;
7921495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
7934f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
7944f53af40SRichard Tran Mills   PetscBool           set, flag;
795b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
7964f53af40SRichard Tran Mills   PetscObjectState    state;
7974f53af40SRichard Tran Mills 
7984f53af40SRichard Tran Mills   PetscFunctionBegin;
7999566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
800b94d7dedSBarry Smith   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8014f53af40SRichard Tran Mills 
8029566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8039566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8049566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8059566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8064f53af40SRichard Tran Mills   csrA                = a->csrA;
8074f53af40SRichard Tran Mills   csrP                = p->csrA;
8084f53af40SRichard Tran Mills   csrC                = c->csrA;
809b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
810190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
811b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8124f53af40SRichard Tran Mills 
813f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
814792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
8154f53af40SRichard Tran Mills 
816190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
817190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
818190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8197301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
820190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
821190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
822190ae7a4SRichard Tran Mills    * the full matrix. */
8239566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8249566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
8259566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C, &zeros, NULL));
8269566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8279566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8289566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
8299566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
830190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
8319566063dSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C));
8329566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
8339566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8349566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8364f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8374f53af40SRichard Tran Mills }
838190ae7a4SRichard Tran Mills 
8399371c9d4SSatish Balay PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) {
840190ae7a4SRichard Tran Mills   Mat_Product        *product = C->product;
841190ae7a4SRichard Tran Mills   Mat                 A = product->A, P = product->B;
8421495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
843190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
844190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
845190ae7a4SRichard Tran Mills   PetscObjectState    state;
846190ae7a4SRichard Tran Mills 
847190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8499566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8509566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8519566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
852190ae7a4SRichard Tran Mills   csrA                = a->csrA;
853190ae7a4SRichard Tran Mills   csrP                = p->csrA;
854190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
855190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
856190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
857190ae7a4SRichard Tran Mills 
858190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
859190ae7a4SRichard Tran Mills   if (csrP && csrA) {
860792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
861190ae7a4SRichard Tran Mills   } else {
862190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
863190ae7a4SRichard Tran Mills   }
864190ae7a4SRichard Tran Mills 
865190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
866190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
86749ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
86849ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
86949ba5396SRichard Tran Mills    * is guaranteed. */
8709566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
871190ae7a4SRichard Tran Mills 
872190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
873190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
874190ae7a4SRichard Tran Mills }
875190ae7a4SRichard Tran Mills 
8769371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) {
877190ae7a4SRichard Tran Mills   PetscFunctionBegin;
878190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
879190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
880190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
881190ae7a4SRichard Tran Mills }
882190ae7a4SRichard Tran Mills 
8839371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) {
884190ae7a4SRichard Tran Mills   PetscFunctionBegin;
885190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
886190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
887190ae7a4SRichard Tran Mills }
888190ae7a4SRichard Tran Mills 
8899371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) {
890190ae7a4SRichard Tran Mills   PetscFunctionBegin;
891190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
892190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
893190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
894190ae7a4SRichard Tran Mills }
895190ae7a4SRichard Tran Mills 
8969371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) {
897190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
898190ae7a4SRichard Tran Mills   Mat          A       = product->A;
899190ae7a4SRichard Tran Mills   PetscBool    set, flag;
900190ae7a4SRichard Tran Mills 
901190ae7a4SRichard Tran Mills   PetscFunctionBegin;
902a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9032ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9042ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
905190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9062ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
907190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
908a3d67537SPierre Jolivet   } else {
909190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9109566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
911a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
912a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9131495fedeSRichard Tran Mills     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
914190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
915a3d67537SPierre Jolivet   }
916190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
917190ae7a4SRichard Tran Mills }
918190ae7a4SRichard Tran Mills 
9199371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) {
920190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9212ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
922190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
923190ae7a4SRichard Tran Mills }
924190ae7a4SRichard Tran Mills 
9259371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) {
926190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9272ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
928190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
929190ae7a4SRichard Tran Mills }
930190ae7a4SRichard Tran Mills 
9319371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) {
932190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
933190ae7a4SRichard Tran Mills 
934190ae7a4SRichard Tran Mills   PetscFunctionBegin;
935190ae7a4SRichard Tran Mills   switch (product->type) {
9369371c9d4SSatish Balay   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break;
9379371c9d4SSatish Balay   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break;
9389371c9d4SSatish Balay   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break;
9399371c9d4SSatish Balay   case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break;
9409371c9d4SSatish Balay   case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break;
9419371c9d4SSatish Balay   case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break;
9429371c9d4SSatish Balay   default: break;
943190ae7a4SRichard Tran Mills   }
944190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
945190ae7a4SRichard Tran Mills }
946431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
947190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
9484f53af40SRichard Tran Mills 
9494a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
950510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
9514a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
9524a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
9539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
9544a2a386eSRichard Tran Mills   Mat            B = *newmat;
9554a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
956c9d46305SRichard Tran Mills   PetscBool      set;
957e9c94282SRichard Tran Mills   PetscBool      sametype;
9584a2a386eSRichard Tran Mills 
9594a2a386eSRichard Tran Mills   PetscFunctionBegin;
9609566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
9614a2a386eSRichard Tran Mills 
9629566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
963e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
964e9c94282SRichard Tran Mills 
9659566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B, &aijmkl));
9664a2a386eSRichard Tran Mills   B->spptr = (void *)aijmkl;
9674a2a386eSRichard Tran Mills 
968df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
969969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
9704a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
9714a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
9724a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
973c9d46305SRichard Tran Mills 
9744abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
975ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
976d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
977a8327b06SKarl Rupp #else
978d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
979d995685eSRichard Tran Mills #endif
9805b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
9814abfa3b3SRichard Tran Mills 
9824abfa3b3SRichard Tran Mills   /* Parse command line options. */
983d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
9849566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
9859566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
986d0609cedSBarry Smith   PetscOptionsEnd();
987ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
988d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
9899566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
990d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
991d995685eSRichard Tran Mills   }
992d995685eSRichard Tran Mills #endif
993c9d46305SRichard Tran Mills 
994ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
995df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
996969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
997df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
998969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
9998a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1000190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1001190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1002190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1003190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1004190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1005ffcab697SRichard Tran Mills #if !defined(PETSC_USE_COMPLEX)
100649ba5396SRichard Tran Mills   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1007190ae7a4SRichard Tran Mills #else
1008190ae7a4SRichard Tran Mills   B->ops->ptapnumeric = NULL;
10094f53af40SRichard Tran Mills #endif
1010e8be1fc7SRichard Tran Mills #endif
10111950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10121950a7e7SRichard Tran Mills 
1013213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1014213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1015213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1016213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1017213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10181950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10194a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1020969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10214a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1022969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1023c9d46305SRichard Tran Mills   }
10241950a7e7SRichard Tran Mills #endif
10254a2a386eSRichard Tran Mills 
10269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
10274a2a386eSRichard Tran Mills 
10289566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
10294a2a386eSRichard Tran Mills   *newmat = B;
10304a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10314a2a386eSRichard Tran Mills }
10324a2a386eSRichard Tran Mills 
10334a2a386eSRichard Tran Mills /*@C
10344a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
10354a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
10364a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
103790147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
103890147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1039597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1040597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1041597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
104290147e49SRichard Tran Mills 
1043d083f849SBarry Smith    Collective
10444a2a386eSRichard Tran Mills 
10454a2a386eSRichard Tran Mills    Input Parameters:
10464a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
10474a2a386eSRichard Tran Mills .  m - number of rows
10484a2a386eSRichard Tran Mills .  n - number of columns
10494a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
10504a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
10514a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
10524a2a386eSRichard Tran Mills 
10534a2a386eSRichard Tran Mills    Output Parameter:
10544a2a386eSRichard Tran Mills .  A - the matrix
10554a2a386eSRichard Tran Mills 
105690147e49SRichard Tran Mills    Options Database Keys:
105766b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
105866b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
105990147e49SRichard Tran Mills 
10604a2a386eSRichard Tran Mills    Notes:
10614a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
10624a2a386eSRichard Tran Mills 
10634a2a386eSRichard Tran Mills    Level: intermediate
10644a2a386eSRichard Tran Mills 
1065db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
10664a2a386eSRichard Tran Mills @*/
10679371c9d4SSatish Balay PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
10684a2a386eSRichard Tran Mills   PetscFunctionBegin;
10699566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
10709566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
10719566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJMKL));
10729566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
10734a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10744a2a386eSRichard Tran Mills }
10754a2a386eSRichard Tran Mills 
10769371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) {
10774a2a386eSRichard Tran Mills   PetscFunctionBegin;
10789566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
10799566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
10804a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10814a2a386eSRichard Tran Mills }
1082