xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 2fe279fdf3e687a416e4eadb7d3c7a82d60442c6)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
13bdfea6dbSBarry Smith   #define MKL_ILP64
14bdfea6dbSBarry Smith #endif
15b9e7e5c1SBarry Smith #include <mkl_spblas.h>
164a2a386eSRichard Tran Mills 
174a2a386eSRichard Tran Mills typedef struct {
18c9d46305SRichard Tran Mills   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
195b49642aSRichard Tran Mills   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
204abfa3b3SRichard Tran Mills   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
21551aa5c8SRichard Tran Mills   PetscObjectState state;
22ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
23df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
24df555b71SRichard Tran Mills   struct matrix_descr descr;
25b8cbc1fbSRichard Tran Mills #endif
264a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
274a2a386eSRichard Tran Mills 
284a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
294a2a386eSRichard Tran Mills 
30d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
31d71ae5a4SJacob Faibussowitsch {
324a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
334a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
344a2a386eSRichard Tran Mills   Mat B = *newmat;
35ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
364a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
37c1d5218aSRichard Tran Mills #endif
384a2a386eSRichard Tran Mills 
394a2a386eSRichard Tran Mills   PetscFunctionBegin;
409566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
554f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
584222ddf1SHong Zhang 
59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
604abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
62e9c94282SRichard Tran Mills    * the spptr pointer. */
63a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
64a8327b06SKarl Rupp 
65792fecdfSBarry Smith   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
66ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
679566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
684a2a386eSRichard Tran Mills 
694a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
709566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
714a2a386eSRichard Tran Mills 
724a2a386eSRichard Tran Mills   *newmat = B;
733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
744a2a386eSRichard Tran Mills }
754a2a386eSRichard Tran Mills 
76d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
77d71ae5a4SJacob Faibussowitsch {
784a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
794a2a386eSRichard Tran Mills 
804a2a386eSRichard Tran Mills   PetscFunctionBegin;
81e9c94282SRichard Tran Mills 
82edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
83e9c94282SRichard Tran Mills   if (aijmkl) {
844a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
85ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
86792fecdfSBarry Smith     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
874abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
889566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
89e9c94282SRichard Tran Mills   }
904a2a386eSRichard Tran Mills 
914a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
924a2a386eSRichard Tran Mills    * to destroy everything that remains. */
939566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
94*2fe279fdSBarry Smith   /* I don't call MatSetType().  I believe this is because that
954a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
964a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
972e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
989566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1004a2a386eSRichard Tran Mills }
1014a2a386eSRichard Tran Mills 
102190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1035b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1045b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1055b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1065b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1075b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1085b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
109d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
110d71ae5a4SJacob Faibussowitsch {
111ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1126e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1136e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1146e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11545fbe478SRichard Tran Mills   PetscFunctionBegin;
1163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1176e369cd5SRichard Tran Mills #else
118a8327b06SKarl Rupp   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
119a8327b06SKarl Rupp   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
120a8327b06SKarl Rupp   PetscInt       m, n;
121a8327b06SKarl Rupp   MatScalar     *aa;
122a8327b06SKarl Rupp   PetscInt      *aj, *ai;
1234a2a386eSRichard Tran Mills 
124a8327b06SKarl Rupp   PetscFunctionBegin;
125e626a176SRichard Tran Mills   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
126e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
127e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
128e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
129e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1303ba16761SJacob Faibussowitsch   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
1314d51fa23SRichard Tran Mills   #endif
1326e369cd5SRichard Tran Mills 
1330632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1340632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1350632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
136792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
1370632b357SRichard Tran Mills   }
1388d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1396e369cd5SRichard Tran Mills 
140c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
141df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
142df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
143df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14458678438SRichard Tran Mills   m                  = A->rmap->n;
14558678438SRichard Tran Mills   n                  = A->cmap->n;
146df555b71SRichard Tran Mills   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
147df555b71SRichard Tran Mills   aa                 = a->a; /* Nonzero elements stored row-by-row. */
148df555b71SRichard Tran Mills   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
1491495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1508d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1518d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
152bdfea6dbSBarry Smith     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
153792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
154792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
15548a46eb9SPierre Jolivet     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
1564abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
1579566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
158190ae7a4SRichard Tran Mills   } else {
159f3fa974cSJacob Faibussowitsch     aijmkl->csrA = NULL;
160c9d46305SRichard Tran Mills   }
1616e369cd5SRichard Tran Mills 
1623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
163d995685eSRichard Tran Mills #endif
1646e369cd5SRichard Tran Mills }
1656e369cd5SRichard Tran Mills 
166b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
167190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
168d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
169d71ae5a4SJacob Faibussowitsch {
17019afcda9SRichard Tran Mills   sparse_index_base_t indexing;
171190ae7a4SRichard Tran Mills   PetscInt            m, n;
17245fbe478SRichard Tran Mills   PetscInt           *aj, *ai, *dummy;
17319afcda9SRichard Tran Mills   MatScalar          *aa;
17419afcda9SRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl;
17519afcda9SRichard Tran Mills 
176362febeeSStefano Zampini   PetscFunctionBegin;
177190ae7a4SRichard Tran Mills   if (csrA) {
17845fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
179bdfea6dbSBarry Smith     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
1805f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
181190ae7a4SRichard Tran Mills   } else {
182f3fa974cSJacob Faibussowitsch     aj = ai = NULL;
183f3fa974cSJacob Faibussowitsch     aa      = NULL;
184aab60f1bSRichard Tran Mills   }
185190ae7a4SRichard Tran Mills 
1869566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
1879566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
188aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
189aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
190aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
191aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
192190ae7a4SRichard Tran Mills   if (csrA) {
1939566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
194190ae7a4SRichard Tran Mills   } else {
195190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1969566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1979566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1989566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
199190ae7a4SRichard Tran Mills   }
20019afcda9SRichard Tran Mills 
20119afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
20219afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
2039566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
2046c87cf42SRichard Tran Mills 
20519afcda9SRichard Tran Mills   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
20619afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2076c87cf42SRichard Tran Mills 
20819afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20919afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
21019afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
211f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
212f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
213f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
214190ae7a4SRichard Tran Mills   if (csrA) {
215792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
216792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
2171950a7e7SRichard Tran Mills   }
2189566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
2193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22019afcda9SRichard Tran Mills }
221b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
222190ae7a4SRichard Tran Mills 
223e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
224e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
225e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
226b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
227d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
228d71ae5a4SJacob Faibussowitsch {
229e8be1fc7SRichard Tran Mills   PetscInt            i;
230e8be1fc7SRichard Tran Mills   PetscInt            nrows, ncols;
231e8be1fc7SRichard Tran Mills   PetscInt            nz;
232e8be1fc7SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
233e8be1fc7SRichard Tran Mills   PetscScalar        *aa;
2341495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
235e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
236e8be1fc7SRichard Tran Mills 
237362febeeSStefano Zampini   PetscFunctionBegin;
238190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2393ba16761SJacob Faibussowitsch   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
240190ae7a4SRichard Tran Mills 
241e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
242bdfea6dbSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
243e8be1fc7SRichard Tran Mills 
244e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
245e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
246e8be1fc7SRichard Tran Mills   for (i = 0; i < nrows; i++) {
247e8be1fc7SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2489566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
249e8be1fc7SRichard Tran Mills   }
250e8be1fc7SRichard Tran Mills 
2519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
253e8be1fc7SRichard Tran Mills 
2549566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
255a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
256a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
257a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
258a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
2593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
260e8be1fc7SRichard Tran Mills }
261b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
262e8be1fc7SRichard Tran Mills 
2633849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
264d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
265d71ae5a4SJacob Faibussowitsch {
2663849ddb2SRichard Tran Mills   PetscInt            i, j, k;
2673849ddb2SRichard Tran Mills   PetscInt            nrows, ncols;
2683849ddb2SRichard Tran Mills   PetscInt            nz;
2693849ddb2SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
2703849ddb2SRichard Tran Mills   PetscScalar        *aa;
2711495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2723849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2733849ddb2SRichard Tran Mills 
274362febeeSStefano Zampini   PetscFunctionBegin;
2759566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2763849ddb2SRichard Tran Mills 
2773849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2783849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2799566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
2803ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2813849ddb2SRichard Tran Mills   }
2823849ddb2SRichard Tran Mills 
2833849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
284bdfea6dbSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
2853849ddb2SRichard Tran Mills 
2863849ddb2SRichard Tran Mills   k = 0;
2873849ddb2SRichard Tran Mills   for (i = 0; i < nrows; i++) {
2889566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
2893849ddb2SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2903849ddb2SRichard Tran Mills     for (j = 0; j < nz; j++) {
2913849ddb2SRichard Tran Mills       if (aa) {
2929566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
2933849ddb2SRichard Tran Mills       } else {
2949566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
2953849ddb2SRichard Tran Mills       }
2963849ddb2SRichard Tran Mills       k++;
2973849ddb2SRichard Tran Mills     }
2989566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
2993849ddb2SRichard Tran Mills   }
3003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3013849ddb2SRichard Tran Mills }
3023849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3033849ddb2SRichard Tran Mills 
304d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
305d71ae5a4SJacob Faibussowitsch {
3061495fedeSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3076e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
3086e369cd5SRichard Tran Mills 
3096e369cd5SRichard Tran Mills   PetscFunctionBegin;
3109566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
3116e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
3129566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
3136e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3141baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3166e369cd5SRichard Tran Mills }
3176e369cd5SRichard Tran Mills 
318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
319d71ae5a4SJacob Faibussowitsch {
3206e369cd5SRichard Tran Mills   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
3215b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
3226e369cd5SRichard Tran Mills 
3236e369cd5SRichard Tran Mills   PetscFunctionBegin;
3243ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
3256e369cd5SRichard Tran Mills 
3266e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3276e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3286e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3296e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
330d96e85feSRichard Tran Mills    * a lot of code duplication. */
3316e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
3329566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3336e369cd5SRichard Tran Mills 
3345b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3355b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3365b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3371baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
338df555b71SRichard Tran Mills 
3393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3404a2a386eSRichard Tran Mills }
3414a2a386eSRichard Tran Mills 
342e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
343d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
344d71ae5a4SJacob Faibussowitsch {
3454a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3464a2a386eSRichard Tran Mills   const PetscScalar *x;
3474a2a386eSRichard Tran Mills   PetscScalar       *y;
3484a2a386eSRichard Tran Mills   const MatScalar   *aa;
3494a2a386eSRichard Tran Mills   PetscInt           m     = A->rmap->n;
350db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
351db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
352db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
3534a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
354db63039fSRichard Tran Mills   char               matdescra[6];
355db63039fSRichard Tran Mills 
3564a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
357ff03dc53SRichard Tran Mills   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
358ff03dc53SRichard Tran Mills 
359ff03dc53SRichard Tran Mills   PetscFunctionBegin;
360db63039fSRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
361db63039fSRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
3629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
364ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
365ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
366ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
367ff03dc53SRichard Tran Mills 
368ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
369db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
370ff03dc53SRichard Tran Mills 
3719566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
3743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
375ff03dc53SRichard Tran Mills }
3761950a7e7SRichard Tran Mills #endif
377ff03dc53SRichard Tran Mills 
378ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
379d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
380d71ae5a4SJacob Faibussowitsch {
381df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
382df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
383df555b71SRichard Tran Mills   const PetscScalar *x;
384df555b71SRichard Tran Mills   PetscScalar       *y;
385551aa5c8SRichard Tran Mills   PetscObjectState   state;
386df555b71SRichard Tran Mills 
387df555b71SRichard Tran Mills   PetscFunctionBegin;
388df555b71SRichard Tran Mills 
38938987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
39038987b35SRichard Tran Mills   if (!a->nz) {
3919566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
3929566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->rmap->n));
3939566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
3943ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
39538987b35SRichard Tran Mills   }
396f36dfe3fSRichard Tran Mills 
3979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
399df555b71SRichard Tran Mills 
4003fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4013fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4023fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4039566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4049566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4053fa15762SRichard Tran Mills 
406df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
407792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
408df555b71SRichard Tran Mills 
4099566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
413df555b71SRichard Tran Mills }
414d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
415df555b71SRichard Tran Mills 
416e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
418d71ae5a4SJacob Faibussowitsch {
419ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
420ff03dc53SRichard Tran Mills   const PetscScalar *x;
421ff03dc53SRichard Tran Mills   PetscScalar       *y;
422ff03dc53SRichard Tran Mills   const MatScalar   *aa;
423ff03dc53SRichard Tran Mills   PetscInt           m     = A->rmap->n;
424db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
425db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
426db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
427ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
428db63039fSRichard Tran Mills   char               matdescra[6];
429ff03dc53SRichard Tran Mills 
430ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
431ff03dc53SRichard Tran Mills   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
4324a2a386eSRichard Tran Mills 
4334a2a386eSRichard Tran Mills   PetscFunctionBegin;
434969800c5SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
435969800c5SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
4369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4379566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4384a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
4394a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
4404a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
4414a2a386eSRichard Tran Mills 
4424a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
443db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
4444a2a386eSRichard Tran Mills 
4459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4494a2a386eSRichard Tran Mills }
4501950a7e7SRichard Tran Mills #endif
4514a2a386eSRichard Tran Mills 
452ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
453d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
454d71ae5a4SJacob Faibussowitsch {
455df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
456df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
457df555b71SRichard Tran Mills   const PetscScalar *x;
458df555b71SRichard Tran Mills   PetscScalar       *y;
459551aa5c8SRichard Tran Mills   PetscObjectState   state;
460df555b71SRichard Tran Mills 
461df555b71SRichard Tran Mills   PetscFunctionBegin;
462df555b71SRichard Tran Mills 
46338987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
46438987b35SRichard Tran Mills   if (!a->nz) {
4659566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
4669566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->cmap->n));
4679566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
4683ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
46938987b35SRichard Tran Mills   }
470f36dfe3fSRichard Tran Mills 
4719566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
473df555b71SRichard Tran Mills 
4743fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4753fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4763fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4779566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4789566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4793fa15762SRichard Tran Mills 
480df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
481792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
482df555b71SRichard Tran Mills 
4839566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
487df555b71SRichard Tran Mills }
488d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
489df555b71SRichard Tran Mills 
490e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
491d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
492d71ae5a4SJacob Faibussowitsch {
4934a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
4944a2a386eSRichard Tran Mills   const PetscScalar *x;
4954a2a386eSRichard Tran Mills   PetscScalar       *y, *z;
4964a2a386eSRichard Tran Mills   const MatScalar   *aa;
4974a2a386eSRichard Tran Mills   PetscInt           m = A->rmap->n;
498db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
4994a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
5004a2a386eSRichard Tran Mills   PetscInt           i;
5014a2a386eSRichard Tran Mills 
502ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
503ff03dc53SRichard Tran Mills   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
504a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
505db63039fSRichard Tran Mills   PetscScalar beta;
506a84739b8SRichard Tran Mills   char        matdescra[6];
507ff03dc53SRichard Tran Mills 
508ff03dc53SRichard Tran Mills   PetscFunctionBegin;
509a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
510a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
511a84739b8SRichard Tran Mills 
5129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
514ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
515ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
516ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
517ff03dc53SRichard Tran Mills 
518ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
519a84739b8SRichard Tran Mills   if (zz == yy) {
520a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
521db63039fSRichard Tran Mills     beta = 1.0;
522db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
523a84739b8SRichard Tran Mills   } else {
524db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
525db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
526db63039fSRichard Tran Mills     beta = 0.0;
527db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
528ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
529a84739b8SRichard Tran Mills   }
530ff03dc53SRichard Tran Mills 
5319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
535ff03dc53SRichard Tran Mills }
5361950a7e7SRichard Tran Mills #endif
537ff03dc53SRichard Tran Mills 
538ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
539d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
540d71ae5a4SJacob Faibussowitsch {
541df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
542df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
543df555b71SRichard Tran Mills   const PetscScalar *x;
544df555b71SRichard Tran Mills   PetscScalar       *y, *z;
545df555b71SRichard Tran Mills   PetscInt           m = A->rmap->n;
546df555b71SRichard Tran Mills   PetscInt           i;
547df555b71SRichard Tran Mills 
548df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
549551aa5c8SRichard Tran Mills   PetscObjectState state;
550df555b71SRichard Tran Mills 
551df555b71SRichard Tran Mills   PetscFunctionBegin;
552df555b71SRichard Tran Mills 
55338987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
55438987b35SRichard Tran Mills   if (!a->nz) {
5559566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5569566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, m));
5579566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5583ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
55938987b35SRichard Tran Mills   }
560df555b71SRichard Tran Mills 
5619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
563df555b71SRichard Tran Mills 
5643fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5653fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5663fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5679566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
5689566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5693fa15762SRichard Tran Mills 
570df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
571df555b71SRichard Tran Mills   if (zz == yy) {
572df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
573df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
574792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
575df555b71SRichard Tran Mills   } else {
576df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
577df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
578792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
5795f80ce2aSJacob Faibussowitsch     for (i = 0; i < m; i++) z[i] += y[i];
580df555b71SRichard Tran Mills   }
581df555b71SRichard Tran Mills 
5829566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
586df555b71SRichard Tran Mills }
587d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
588df555b71SRichard Tran Mills 
589e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
590d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
591d71ae5a4SJacob Faibussowitsch {
592ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
593ff03dc53SRichard Tran Mills   const PetscScalar *x;
594ff03dc53SRichard Tran Mills   PetscScalar       *y, *z;
595ff03dc53SRichard Tran Mills   const MatScalar   *aa;
596ff03dc53SRichard Tran Mills   PetscInt           m = A->rmap->n;
597db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
598ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
599ff03dc53SRichard Tran Mills   PetscInt           i;
600ff03dc53SRichard Tran Mills 
601ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
602ff03dc53SRichard Tran Mills   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
603a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
604db63039fSRichard Tran Mills   PetscScalar beta;
605a84739b8SRichard Tran Mills   char        matdescra[6];
6064a2a386eSRichard Tran Mills 
6074a2a386eSRichard Tran Mills   PetscFunctionBegin;
608a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
609a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
610a84739b8SRichard Tran Mills 
6119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6134a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
6144a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
6154a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
6164a2a386eSRichard Tran Mills 
6174a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
618a84739b8SRichard Tran Mills   if (zz == yy) {
619a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
620db63039fSRichard Tran Mills     beta = 1.0;
621969800c5SRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
622a84739b8SRichard Tran Mills   } else {
623db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
624db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
625db63039fSRichard Tran Mills     beta = 0.0;
626db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
627ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
628a84739b8SRichard Tran Mills   }
6294a2a386eSRichard Tran Mills 
6309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6344a2a386eSRichard Tran Mills }
6351950a7e7SRichard Tran Mills #endif
6364a2a386eSRichard Tran Mills 
637ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
638d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
639d71ae5a4SJacob Faibussowitsch {
640df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
641df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
642df555b71SRichard Tran Mills   const PetscScalar *x;
643df555b71SRichard Tran Mills   PetscScalar       *y, *z;
644969800c5SRichard Tran Mills   PetscInt           n = A->cmap->n;
645df555b71SRichard Tran Mills   PetscInt           i;
646551aa5c8SRichard Tran Mills   PetscObjectState   state;
647df555b71SRichard Tran Mills 
648df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
649df555b71SRichard Tran Mills 
650df555b71SRichard Tran Mills   PetscFunctionBegin;
651df555b71SRichard Tran Mills 
65238987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
65338987b35SRichard Tran Mills   if (!a->nz) {
6549566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6559566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, n));
6569566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6573ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
65838987b35SRichard Tran Mills   }
659f36dfe3fSRichard Tran Mills 
6609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
662df555b71SRichard Tran Mills 
6633fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6643fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6653fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6669566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6673ba16761SJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6683fa15762SRichard Tran Mills 
669df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
670df555b71SRichard Tran Mills   if (zz == yy) {
671df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
672df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
673792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
674df555b71SRichard Tran Mills   } else {
675df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
676df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
677792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
6785f80ce2aSJacob Faibussowitsch     for (i = 0; i < n; i++) z[i] += y[i];
679df555b71SRichard Tran Mills   }
680df555b71SRichard Tran Mills 
6819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
685df555b71SRichard Tran Mills }
686d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
687df555b71SRichard Tran Mills 
6888a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
689d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
690d71ae5a4SJacob Faibussowitsch {
6911495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
692431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
693190ae7a4SRichard Tran Mills   PetscInt            nrows, ncols;
694431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
695431879ecSRichard Tran Mills   PetscObjectState    state;
696431879ecSRichard Tran Mills 
697431879ecSRichard Tran Mills   PetscFunctionBegin;
698190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
699190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
700190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
701190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
702190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
703190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
704190ae7a4SRichard Tran Mills 
7059566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7069566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7079566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7089566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
709431879ecSRichard Tran Mills   csrA                = a->csrA;
710431879ecSRichard Tran Mills   csrB                = b->csrA;
711431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
712431879ecSRichard Tran Mills 
713190ae7a4SRichard Tran Mills   if (csrA && csrB) {
714792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
715190ae7a4SRichard Tran Mills   } else {
716f3fa974cSJacob Faibussowitsch     csrC = NULL;
717190ae7a4SRichard Tran Mills   }
718190ae7a4SRichard Tran Mills 
7199566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
720431879ecSRichard Tran Mills 
7213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
722431879ecSRichard Tran Mills }
723431879ecSRichard Tran Mills 
724d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
725d71ae5a4SJacob Faibussowitsch {
7261495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
727e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
728e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
729e8be1fc7SRichard Tran Mills   PetscObjectState    state;
730e8be1fc7SRichard Tran Mills 
731e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7329566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7339566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7349566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7359566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
736e8be1fc7SRichard Tran Mills   csrA                = a->csrA;
737e8be1fc7SRichard Tran Mills   csrB                = b->csrA;
738e8be1fc7SRichard Tran Mills   csrC                = c->csrA;
739e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
740e8be1fc7SRichard Tran Mills 
741190ae7a4SRichard Tran Mills   if (csrA && csrB) {
742792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
743190ae7a4SRichard Tran Mills   } else {
744f3fa974cSJacob Faibussowitsch     csrC = NULL;
745190ae7a4SRichard Tran Mills   }
746e8be1fc7SRichard Tran Mills 
747e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
749e8be1fc7SRichard Tran Mills 
7503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
751e8be1fc7SRichard Tran Mills }
752e8be1fc7SRichard Tran Mills 
753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754d71ae5a4SJacob Faibussowitsch {
755190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7569566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
758190ae7a4SRichard Tran Mills }
759190ae7a4SRichard Tran Mills 
760d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
761d71ae5a4SJacob Faibussowitsch {
762190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7639566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
765190ae7a4SRichard Tran Mills }
766190ae7a4SRichard Tran Mills 
767d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
768d71ae5a4SJacob Faibussowitsch {
769190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7709566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
772190ae7a4SRichard Tran Mills }
773190ae7a4SRichard Tran Mills 
774d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
775d71ae5a4SJacob Faibussowitsch {
776190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7779566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
779190ae7a4SRichard Tran Mills }
780190ae7a4SRichard Tran Mills 
781d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
782d71ae5a4SJacob Faibussowitsch {
783190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7849566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
786190ae7a4SRichard Tran Mills }
787190ae7a4SRichard Tran Mills 
788d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
789d71ae5a4SJacob Faibussowitsch {
790190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7919566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
793190ae7a4SRichard Tran Mills }
794190ae7a4SRichard Tran Mills 
795d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796d71ae5a4SJacob Faibussowitsch {
797190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
798190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
799190ae7a4SRichard Tran Mills 
800190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8019566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
8023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
803190ae7a4SRichard Tran Mills }
804190ae7a4SRichard Tran Mills 
805d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
806d71ae5a4SJacob Faibussowitsch {
807190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
808190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
809190ae7a4SRichard Tran Mills   PetscReal    fill = product->fill;
810190ae7a4SRichard Tran Mills 
811190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8129566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
813190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
8143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
815190ae7a4SRichard Tran Mills }
816190ae7a4SRichard Tran Mills 
817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
818d71ae5a4SJacob Faibussowitsch {
819190ae7a4SRichard Tran Mills   Mat                 Ct;
820190ae7a4SRichard Tran Mills   Vec                 zeros;
8211495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
8224f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8234f53af40SRichard Tran Mills   PetscBool           set, flag;
824b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8254f53af40SRichard Tran Mills   PetscObjectState    state;
8264f53af40SRichard Tran Mills 
8274f53af40SRichard Tran Mills   PetscFunctionBegin;
8289566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
829b94d7dedSBarry Smith   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8304f53af40SRichard Tran Mills 
8319566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8329566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8339566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8349566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8354f53af40SRichard Tran Mills   csrA                = a->csrA;
8364f53af40SRichard Tran Mills   csrP                = p->csrA;
8374f53af40SRichard Tran Mills   csrC                = c->csrA;
838b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
839190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
840b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8414f53af40SRichard Tran Mills 
842*2fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
843792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
8444f53af40SRichard Tran Mills 
845190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
846190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
847190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8487301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
849190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
850190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
851190ae7a4SRichard Tran Mills    * the full matrix. */
8529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8539566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
8549566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C, &zeros, NULL));
8559566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8569566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8579566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
8589566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
859190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
860f3fa974cSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
8619566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
8629566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8639566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8664f53af40SRichard Tran Mills }
867190ae7a4SRichard Tran Mills 
868d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
869d71ae5a4SJacob Faibussowitsch {
870190ae7a4SRichard Tran Mills   Mat_Product        *product = C->product;
871190ae7a4SRichard Tran Mills   Mat                 A = product->A, P = product->B;
8721495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
873190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
874190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
875190ae7a4SRichard Tran Mills   PetscObjectState    state;
876190ae7a4SRichard Tran Mills 
877190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8789566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8799566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8809566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8819566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
882190ae7a4SRichard Tran Mills   csrA                = a->csrA;
883190ae7a4SRichard Tran Mills   csrP                = p->csrA;
884190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
885190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
886190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
887190ae7a4SRichard Tran Mills 
888*2fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
889190ae7a4SRichard Tran Mills   if (csrP && csrA) {
890792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
891190ae7a4SRichard Tran Mills   } else {
892f3fa974cSJacob Faibussowitsch     csrC = NULL;
893190ae7a4SRichard Tran Mills   }
894190ae7a4SRichard Tran Mills 
895190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
896190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
89749ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
89849ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
89949ba5396SRichard Tran Mills    * is guaranteed. */
9009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
901190ae7a4SRichard Tran Mills 
902190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
9033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
904190ae7a4SRichard Tran Mills }
905190ae7a4SRichard Tran Mills 
906d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
907d71ae5a4SJacob Faibussowitsch {
908190ae7a4SRichard Tran Mills   PetscFunctionBegin;
909190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
910190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
9113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
912190ae7a4SRichard Tran Mills }
913190ae7a4SRichard Tran Mills 
914d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
915d71ae5a4SJacob Faibussowitsch {
916190ae7a4SRichard Tran Mills   PetscFunctionBegin;
917190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
9183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
919190ae7a4SRichard Tran Mills }
920190ae7a4SRichard Tran Mills 
921d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
922d71ae5a4SJacob Faibussowitsch {
923190ae7a4SRichard Tran Mills   PetscFunctionBegin;
924190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
925190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
9263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
927190ae7a4SRichard Tran Mills }
928190ae7a4SRichard Tran Mills 
929d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
930d71ae5a4SJacob Faibussowitsch {
931190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
932190ae7a4SRichard Tran Mills   Mat          A       = product->A;
933190ae7a4SRichard Tran Mills   PetscBool    set, flag;
934190ae7a4SRichard Tran Mills 
935190ae7a4SRichard Tran Mills   PetscFunctionBegin;
936a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9372ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9382ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
939190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9402ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
941190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
942a3d67537SPierre Jolivet   } else {
943190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9449566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
945a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
946a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947*2fe279fdSBarry Smith     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
948190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
949a3d67537SPierre Jolivet   }
9503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
951190ae7a4SRichard Tran Mills }
952190ae7a4SRichard Tran Mills 
953d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
954d71ae5a4SJacob Faibussowitsch {
955190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9562ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
958190ae7a4SRichard Tran Mills }
959190ae7a4SRichard Tran Mills 
960d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
961d71ae5a4SJacob Faibussowitsch {
962190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9632ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
965190ae7a4SRichard Tran Mills }
966190ae7a4SRichard Tran Mills 
967d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
968d71ae5a4SJacob Faibussowitsch {
969190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
970190ae7a4SRichard Tran Mills 
971190ae7a4SRichard Tran Mills   PetscFunctionBegin;
972190ae7a4SRichard Tran Mills   switch (product->type) {
973d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
974d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
975d71ae5a4SJacob Faibussowitsch     break;
976d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
977d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
978d71ae5a4SJacob Faibussowitsch     break;
979d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
980d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
981d71ae5a4SJacob Faibussowitsch     break;
982d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_PtAP:
983d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
984d71ae5a4SJacob Faibussowitsch     break;
985d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_RARt:
986d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
987d71ae5a4SJacob Faibussowitsch     break;
988d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABC:
989d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
990d71ae5a4SJacob Faibussowitsch     break;
991d71ae5a4SJacob Faibussowitsch   default:
992d71ae5a4SJacob Faibussowitsch     break;
993190ae7a4SRichard Tran Mills   }
9943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
995190ae7a4SRichard Tran Mills }
996431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
9974f53af40SRichard Tran Mills 
9984a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
999510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
10004a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
10014a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
1002d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1003d71ae5a4SJacob Faibussowitsch {
10044a2a386eSRichard Tran Mills   Mat            B = *newmat;
10054a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
1006c9d46305SRichard Tran Mills   PetscBool      set;
1007e9c94282SRichard Tran Mills   PetscBool      sametype;
10084a2a386eSRichard Tran Mills 
10094a2a386eSRichard Tran Mills   PetscFunctionBegin;
10109566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
10114a2a386eSRichard Tran Mills 
10129566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
10133ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1014e9c94282SRichard Tran Mills 
10154dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijmkl));
10164a2a386eSRichard Tran Mills   B->spptr = (void *)aijmkl;
10174a2a386eSRichard Tran Mills 
1018df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1019969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
10204a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
10214a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
10224a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1023c9d46305SRichard Tran Mills 
10244abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1025ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1026d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1027a8327b06SKarl Rupp #else
1028d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1029d995685eSRichard Tran Mills #endif
10305b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10314abfa3b3SRichard Tran Mills 
10324abfa3b3SRichard Tran Mills   /* Parse command line options. */
1033d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
10349566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
10359566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1036d0609cedSBarry Smith   PetscOptionsEnd();
1037ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1038d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10399566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1040d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1041d995685eSRichard Tran Mills   }
1042d995685eSRichard Tran Mills #endif
1043c9d46305SRichard Tran Mills 
1044ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1045df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1046969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1047df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1048969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10498a369200SRichard Tran Mills   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1050190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1051190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1052190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1055ffcab697SRichard Tran Mills     #if !defined(PETSC_USE_COMPLEX)
105649ba5396SRichard Tran Mills   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1057190ae7a4SRichard Tran Mills     #else
1058190ae7a4SRichard Tran Mills   B->ops->ptapnumeric = NULL;
10594f53af40SRichard Tran Mills     #endif
1060e8be1fc7SRichard Tran Mills   #endif
10611950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10621950a7e7SRichard Tran Mills 
1063213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1064213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1065213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1066213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1067213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10681950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10694a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1070969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10714a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1072969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1073c9d46305SRichard Tran Mills   }
10741950a7e7SRichard Tran Mills #endif
10754a2a386eSRichard Tran Mills 
10769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
10774a2a386eSRichard Tran Mills 
10789566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
10794a2a386eSRichard Tran Mills   *newmat = B;
10803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10814a2a386eSRichard Tran Mills }
10824a2a386eSRichard Tran Mills 
10834a2a386eSRichard Tran Mills /*@C
108411a5261eSBarry Smith    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
108590147e49SRichard Tran Mills 
1086d083f849SBarry Smith    Collective
10874a2a386eSRichard Tran Mills 
10884a2a386eSRichard Tran Mills    Input Parameters:
108911a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
10904a2a386eSRichard Tran Mills .  m - number of rows
10914a2a386eSRichard Tran Mills .  n - number of columns
10924a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
10934a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
10942ef1f0ffSBarry Smith          (possibly different for each row) or `NULL`
10954a2a386eSRichard Tran Mills 
10964a2a386eSRichard Tran Mills    Output Parameter:
10974a2a386eSRichard Tran Mills .  A - the matrix
10984a2a386eSRichard Tran Mills 
109990147e49SRichard Tran Mills    Options Database Keys:
110066b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
11012ef1f0ffSBarry Smith -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
11022ef1f0ffSBarry Smith                                   performing this step the first time the matrix is applied
11034a2a386eSRichard Tran Mills 
11044a2a386eSRichard Tran Mills    Level: intermediate
11054a2a386eSRichard Tran Mills 
1106*2fe279fdSBarry Smith    Notes:
11072ef1f0ffSBarry Smith    If `nnz` is given then `nz` is ignored
11082ef1f0ffSBarry Smith 
1109*2fe279fdSBarry Smith    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1110*2fe279fdSBarry Smith    routines from Intel MKL whenever possible.
1111*2fe279fdSBarry Smith 
1112*2fe279fdSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
1113*2fe279fdSBarry Smith    inspector-executor routines, then those are used by default.
1114*2fe279fdSBarry Smith 
1115*2fe279fdSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1116*2fe279fdSBarry Smith    (for symmetric A) operations are currently supported.
1117*2fe279fdSBarry Smith    MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1118*2fe279fdSBarry Smith 
11192ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
11204a2a386eSRichard Tran Mills @*/
1121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1122d71ae5a4SJacob Faibussowitsch {
11234a2a386eSRichard Tran Mills   PetscFunctionBegin;
11249566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
11259566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
11269566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJMKL));
11279566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
11283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11294a2a386eSRichard Tran Mills }
11304a2a386eSRichard Tran Mills 
1131d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1132d71ae5a4SJacob Faibussowitsch {
11334a2a386eSRichard Tran Mills   PetscFunctionBegin;
11349566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
11359566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
11363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11374a2a386eSRichard Tran Mills }
1138