xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 08401ef684002a709c6d3db98a0c9f54a8bcf1ec)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   Mat            B       = *newmat;
32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
334a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34c1d5218aSRichard Tran Mills #endif
354a2a386eSRichard Tran Mills 
364a2a386eSRichard Tran Mills   PetscFunctionBegin;
379566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
384a2a386eSRichard Tran Mills 
394a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4054871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
414a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4354871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
44ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4554871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
524f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
534a2a386eSRichard Tran Mills 
549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL));
554222ddf1SHong Zhang 
56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
574abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
59e9c94282SRichard Tran Mills    * the spptr pointer. */
60a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61a8327b06SKarl Rupp 
625f80ce2aSJacob Faibussowitsch   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
649566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
654a2a386eSRichard Tran Mills 
664a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
684a2a386eSRichard Tran Mills 
694a2a386eSRichard Tran Mills   *newmat = B;
704a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
714a2a386eSRichard Tran Mills }
724a2a386eSRichard Tran Mills 
734a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
744a2a386eSRichard Tran Mills {
754a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
764a2a386eSRichard Tran Mills 
774a2a386eSRichard Tran Mills   PetscFunctionBegin;
78e9c94282SRichard Tran Mills 
79edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80e9c94282SRichard Tran Mills   if (aijmkl) {
814a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
835f80ce2aSJacob Faibussowitsch     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
859566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
86e9c94282SRichard Tran Mills   }
874a2a386eSRichard Tran Mills 
884a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
894a2a386eSRichard Tran Mills    * to destroy everything that remains. */
909566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
914a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
924a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
934a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
949566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
954a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
964a2a386eSRichard Tran Mills }
974a2a386eSRichard Tran Mills 
98190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
995b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1005b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1015b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1025b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1035b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1045b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1056e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1064a2a386eSRichard Tran Mills {
107ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1086e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1096e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1106e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11145fbe478SRichard Tran Mills   PetscFunctionBegin;
1126e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1136e369cd5SRichard Tran Mills #else
114a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
115a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
116a8327b06SKarl Rupp   PetscInt         m,n;
117a8327b06SKarl Rupp   MatScalar        *aa;
118a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1194a2a386eSRichard Tran Mills 
120a8327b06SKarl Rupp   PetscFunctionBegin;
121e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1266e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1274d51fa23SRichard Tran Mills #endif
1286e369cd5SRichard Tran Mills 
1290632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1300632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1310632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1325f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
1330632b357SRichard Tran Mills   }
1348d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1356e369cd5SRichard Tran Mills 
136c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
137df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14058678438SRichard Tran Mills   m = A->rmap->n;
14158678438SRichard Tran Mills   n = A->cmap->n;
142df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
143df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
144df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1451495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1468d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1478d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
1485f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
1495f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
1505f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
1511950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
1525f80ce2aSJacob Faibussowitsch       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
1531950a7e7SRichard Tran Mills     }
1544abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
1559566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
156190ae7a4SRichard Tran Mills   } else {
157190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
158c9d46305SRichard Tran Mills   }
1596e369cd5SRichard Tran Mills 
1606e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
161d995685eSRichard Tran Mills #endif
1626e369cd5SRichard Tran Mills }
1636e369cd5SRichard Tran Mills 
164b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
165190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
166190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
16719afcda9SRichard Tran Mills {
16819afcda9SRichard Tran Mills   sparse_index_base_t indexing;
169190ae7a4SRichard Tran Mills   PetscInt            m,n;
17045fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
17119afcda9SRichard Tran Mills   MatScalar           *aa;
17219afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
17319afcda9SRichard Tran Mills 
174362febeeSStefano Zampini   PetscFunctionBegin;
175190ae7a4SRichard Tran Mills   if (csrA) {
17645fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
1775f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
1785f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
179190ae7a4SRichard Tran Mills   } else {
180190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
181190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
182aab60f1bSRichard Tran Mills   }
183190ae7a4SRichard Tran Mills 
1849566063dSJacob Faibussowitsch   PetscCall(MatSetType(A,MATSEQAIJ));
1859566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols));
186aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
187aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
188aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
189aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
190190ae7a4SRichard Tran Mills   if (csrA) {
1919566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL));
192190ae7a4SRichard Tran Mills   } else {
193190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1949566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1959566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1969566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
197190ae7a4SRichard Tran Mills   }
19819afcda9SRichard Tran Mills 
19919afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
20019afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
2019566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
2026c87cf42SRichard Tran Mills 
20319afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
20419afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2056c87cf42SRichard Tran Mills 
20619afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20719afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20819afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
209f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
210f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
211f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
212190ae7a4SRichard Tran Mills   if (csrA) {
2135f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
2145f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
2151950a7e7SRichard Tran Mills   }
2169566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
21719afcda9SRichard Tran Mills   PetscFunctionReturn(0);
21819afcda9SRichard Tran Mills }
219b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
220190ae7a4SRichard Tran Mills 
221e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
222e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
223e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
224b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
225190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
226e8be1fc7SRichard Tran Mills {
227e8be1fc7SRichard Tran Mills   PetscInt            i;
228e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
229e8be1fc7SRichard Tran Mills   PetscInt            nz;
230e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
231e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
2321495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
233e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
234e8be1fc7SRichard Tran Mills 
235362febeeSStefano Zampini   PetscFunctionBegin;
236190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
237190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
238190ae7a4SRichard Tran Mills 
239e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
2405f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
241e8be1fc7SRichard Tran Mills 
242e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
243e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
244e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
245e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
2469566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES));
247e8be1fc7SRichard Tran Mills   }
248e8be1fc7SRichard Tran Mills 
2499566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2509566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
251e8be1fc7SRichard Tran Mills 
2529566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
253a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
254a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
255a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
256a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
257e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
258e8be1fc7SRichard Tran Mills }
259b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
260e8be1fc7SRichard Tran Mills 
2613849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2623849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
2633849ddb2SRichard Tran Mills {
2643849ddb2SRichard Tran Mills   PetscInt            i,j,k;
2653849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
2663849ddb2SRichard Tran Mills   PetscInt            nz;
2673849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
2683849ddb2SRichard Tran Mills   PetscScalar         *aa;
2691495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
2703849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2713849ddb2SRichard Tran Mills 
272362febeeSStefano Zampini   PetscFunctionBegin;
2739566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2743849ddb2SRichard Tran Mills 
2753849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2763849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2779566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n"));
2783849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
2793849ddb2SRichard Tran Mills   }
2803849ddb2SRichard Tran Mills 
2813849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
2825f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
2833849ddb2SRichard Tran Mills 
2843849ddb2SRichard Tran Mills   k = 0;
2853849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
2869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i));
2873849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
2883849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
2893849ddb2SRichard Tran Mills       if (aa) {
2909566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k])));
2913849ddb2SRichard Tran Mills       } else {
2929566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]));
2933849ddb2SRichard Tran Mills       }
2943849ddb2SRichard Tran Mills       k++;
2953849ddb2SRichard Tran Mills     }
2969566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
2973849ddb2SRichard Tran Mills   }
2983849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
2993849ddb2SRichard Tran Mills }
3003849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3013849ddb2SRichard Tran Mills 
3026e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3036e369cd5SRichard Tran Mills {
3041495fedeSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3056e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3066e369cd5SRichard Tran Mills 
3076e369cd5SRichard Tran Mills   PetscFunctionBegin;
3089566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A,op,M));
3096e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
3109566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1));
3116e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3125b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3139566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3145b49642aSRichard Tran Mills   }
3156e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3166e369cd5SRichard Tran Mills }
3176e369cd5SRichard Tran Mills 
3186e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3196e369cd5SRichard Tran Mills {
3206e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3215b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3226e369cd5SRichard Tran Mills 
3236e369cd5SRichard Tran Mills   PetscFunctionBegin;
3246e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3256e369cd5SRichard Tran Mills 
3266e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3276e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3286e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3296e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
330d96e85feSRichard Tran Mills    * a lot of code duplication. */
3316e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3329566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3336e369cd5SRichard Tran Mills 
3345b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3355b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3365b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3375b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3389566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3395b49642aSRichard Tran Mills   }
340df555b71SRichard Tran Mills 
3414a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3424a2a386eSRichard Tran Mills }
3434a2a386eSRichard Tran Mills 
344e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3454a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3464a2a386eSRichard Tran Mills {
3474a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3484a2a386eSRichard Tran Mills   const PetscScalar *x;
3494a2a386eSRichard Tran Mills   PetscScalar       *y;
3504a2a386eSRichard Tran Mills   const MatScalar   *aa;
3514a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
352db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
353db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
354db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3554a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
356db63039fSRichard Tran Mills   char              matdescra[6];
357db63039fSRichard Tran Mills 
3584a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
359ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
360ff03dc53SRichard Tran Mills 
361ff03dc53SRichard Tran Mills   PetscFunctionBegin;
362db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
363db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
3649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
366ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
367ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
368ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
369ff03dc53SRichard Tran Mills 
370ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
371db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
372ff03dc53SRichard Tran Mills 
3739566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
3749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
376ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
377ff03dc53SRichard Tran Mills }
3781950a7e7SRichard Tran Mills #endif
379ff03dc53SRichard Tran Mills 
380ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
381df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
382df555b71SRichard Tran Mills {
383df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
384df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
385df555b71SRichard Tran Mills   const PetscScalar *x;
386df555b71SRichard Tran Mills   PetscScalar       *y;
387551aa5c8SRichard Tran Mills   PetscObjectState  state;
388df555b71SRichard Tran Mills 
389df555b71SRichard Tran Mills   PetscFunctionBegin;
390df555b71SRichard Tran Mills 
39138987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
39238987b35SRichard Tran Mills   if (!a->nz) {
3939566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy,&y));
3949566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y,A->rmap->n));
3959566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy,&y));
39638987b35SRichard Tran Mills     PetscFunctionReturn(0);
39738987b35SRichard Tran Mills   }
398f36dfe3fSRichard Tran Mills 
3999566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4009566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
401df555b71SRichard Tran Mills 
4023fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4033fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4043fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4059566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
4069566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4073fa15762SRichard Tran Mills 
408df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
4095f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410df555b71SRichard Tran Mills 
4119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
414df555b71SRichard Tran Mills   PetscFunctionReturn(0);
415df555b71SRichard Tran Mills }
416d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
417df555b71SRichard Tran Mills 
418e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
419ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
420ff03dc53SRichard Tran Mills {
421ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
422ff03dc53SRichard Tran Mills   const PetscScalar *x;
423ff03dc53SRichard Tran Mills   PetscScalar       *y;
424ff03dc53SRichard Tran Mills   const MatScalar   *aa;
425ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
426db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
427db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
428db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
429ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
430db63039fSRichard Tran Mills   char              matdescra[6];
431ff03dc53SRichard Tran Mills 
432ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
433ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4344a2a386eSRichard Tran Mills 
4354a2a386eSRichard Tran Mills   PetscFunctionBegin;
436969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
437969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4389566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4399566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
4404a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4414a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4424a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4434a2a386eSRichard Tran Mills 
4444a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
445db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4464a2a386eSRichard Tran Mills 
4479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
4504a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4514a2a386eSRichard Tran Mills }
4521950a7e7SRichard Tran Mills #endif
4534a2a386eSRichard Tran Mills 
454ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
455df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
456df555b71SRichard Tran Mills {
457df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
458df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
459df555b71SRichard Tran Mills   const PetscScalar *x;
460df555b71SRichard Tran Mills   PetscScalar       *y;
461551aa5c8SRichard Tran Mills   PetscObjectState  state;
462df555b71SRichard Tran Mills 
463df555b71SRichard Tran Mills   PetscFunctionBegin;
464df555b71SRichard Tran Mills 
46538987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
46638987b35SRichard Tran Mills   if (!a->nz) {
4679566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy,&y));
4689566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y,A->cmap->n));
4699566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy,&y));
47038987b35SRichard Tran Mills     PetscFunctionReturn(0);
47138987b35SRichard Tran Mills   }
472f36dfe3fSRichard Tran Mills 
4739566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4749566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
475df555b71SRichard Tran Mills 
4763fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4773fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4783fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4799566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
4809566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4813fa15762SRichard Tran Mills 
482df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
4835f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
484df555b71SRichard Tran Mills 
4859566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
488df555b71SRichard Tran Mills   PetscFunctionReturn(0);
489df555b71SRichard Tran Mills }
490d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
491df555b71SRichard Tran Mills 
492e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
4934a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4944a2a386eSRichard Tran Mills {
4954a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
4964a2a386eSRichard Tran Mills   const PetscScalar *x;
4974a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
4984a2a386eSRichard Tran Mills   const MatScalar   *aa;
4994a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
500db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5014a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5024a2a386eSRichard Tran Mills   PetscInt          i;
5034a2a386eSRichard Tran Mills 
504ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
505ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
506a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
507db63039fSRichard Tran Mills   PetscScalar       beta;
508a84739b8SRichard Tran Mills   char              matdescra[6];
509ff03dc53SRichard Tran Mills 
510ff03dc53SRichard Tran Mills   PetscFunctionBegin;
511a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
512a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
513a84739b8SRichard Tran Mills 
5149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
516ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
517ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
518ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
519ff03dc53SRichard Tran Mills 
520ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
521a84739b8SRichard Tran Mills   if (zz == yy) {
522a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
523db63039fSRichard Tran Mills     beta = 1.0;
524db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
525a84739b8SRichard Tran Mills   } else {
526db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
527db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
528db63039fSRichard Tran Mills     beta = 0.0;
529db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
530ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
531ff03dc53SRichard Tran Mills       z[i] += y[i];
532ff03dc53SRichard Tran Mills     }
533a84739b8SRichard Tran Mills   }
534ff03dc53SRichard Tran Mills 
5359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
5369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
538ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
539ff03dc53SRichard Tran Mills }
5401950a7e7SRichard Tran Mills #endif
541ff03dc53SRichard Tran Mills 
542ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
543df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
544df555b71SRichard Tran Mills {
545df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
546df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
547df555b71SRichard Tran Mills   const PetscScalar *x;
548df555b71SRichard Tran Mills   PetscScalar       *y,*z;
549df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
550df555b71SRichard Tran Mills   PetscInt          i;
551df555b71SRichard Tran Mills 
552df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
553551aa5c8SRichard Tran Mills   PetscObjectState  state;
554df555b71SRichard Tran Mills 
555df555b71SRichard Tran Mills   PetscFunctionBegin;
556df555b71SRichard Tran Mills 
55738987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
55838987b35SRichard Tran Mills   if (!a->nz) {
5599566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
5609566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z,y,m));
5619566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
56238987b35SRichard Tran Mills     PetscFunctionReturn(0);
56338987b35SRichard Tran Mills   }
564df555b71SRichard Tran Mills 
5659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
567df555b71SRichard Tran Mills 
5683fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5693fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5703fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5719566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
5729566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5733fa15762SRichard Tran Mills 
574df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
575df555b71SRichard Tran Mills   if (zz == yy) {
576df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
577df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
5785f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
579df555b71SRichard Tran Mills   } else {
580df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
581df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
5825f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
5835f80ce2aSJacob Faibussowitsch     for (i=0; i<m; i++) z[i] += y[i];
584df555b71SRichard Tran Mills   }
585df555b71SRichard Tran Mills 
5869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
5879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
589df555b71SRichard Tran Mills   PetscFunctionReturn(0);
590df555b71SRichard Tran Mills }
591d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
592df555b71SRichard Tran Mills 
593e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
594ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
595ff03dc53SRichard Tran Mills {
596ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
597ff03dc53SRichard Tran Mills   const PetscScalar *x;
598ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
599ff03dc53SRichard Tran Mills   const MatScalar   *aa;
600ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
601db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
602ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
603ff03dc53SRichard Tran Mills   PetscInt          i;
604ff03dc53SRichard Tran Mills 
605ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
606ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
607a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
608db63039fSRichard Tran Mills   PetscScalar       beta;
609a84739b8SRichard Tran Mills   char              matdescra[6];
6104a2a386eSRichard Tran Mills 
6114a2a386eSRichard Tran Mills   PetscFunctionBegin;
612a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
613a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
614a84739b8SRichard Tran Mills 
6159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
6174a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6184a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6194a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6204a2a386eSRichard Tran Mills 
6214a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
622a84739b8SRichard Tran Mills   if (zz == yy) {
623a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
624db63039fSRichard Tran Mills     beta = 1.0;
625969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
626a84739b8SRichard Tran Mills   } else {
627db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
628db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
629db63039fSRichard Tran Mills     beta = 0.0;
630db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
631969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6324a2a386eSRichard Tran Mills       z[i] += y[i];
6334a2a386eSRichard Tran Mills     }
634a84739b8SRichard Tran Mills   }
6354a2a386eSRichard Tran Mills 
6369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
6379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
6394a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6404a2a386eSRichard Tran Mills }
6411950a7e7SRichard Tran Mills #endif
6424a2a386eSRichard Tran Mills 
643ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
644df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
645df555b71SRichard Tran Mills {
646df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
647df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
648df555b71SRichard Tran Mills   const PetscScalar *x;
649df555b71SRichard Tran Mills   PetscScalar       *y,*z;
650969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
651df555b71SRichard Tran Mills   PetscInt          i;
652551aa5c8SRichard Tran Mills   PetscObjectState  state;
653df555b71SRichard Tran Mills 
654df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
655df555b71SRichard Tran Mills 
656df555b71SRichard Tran Mills   PetscFunctionBegin;
657df555b71SRichard Tran Mills 
65838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
65938987b35SRichard Tran Mills   if (!a->nz) {
6609566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
6619566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z,y,n));
6629566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
66338987b35SRichard Tran Mills     PetscFunctionReturn(0);
66438987b35SRichard Tran Mills   }
665f36dfe3fSRichard Tran Mills 
6669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
668df555b71SRichard Tran Mills 
6693fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6703fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6713fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6729566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
6735f80ce2aSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
6743fa15762SRichard Tran Mills 
675df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
676df555b71SRichard Tran Mills   if (zz == yy) {
677df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
678df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
6795f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
680df555b71SRichard Tran Mills   } else {
681df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
682df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
6835f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
6845f80ce2aSJacob Faibussowitsch     for (i=0; i<n; i++) z[i] += y[i];
685df555b71SRichard Tran Mills   }
686df555b71SRichard Tran Mills 
6879566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
6889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
690df555b71SRichard Tran Mills   PetscFunctionReturn(0);
691df555b71SRichard Tran Mills }
692d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
693df555b71SRichard Tran Mills 
694190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
6958a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
696190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
697431879ecSRichard Tran Mills {
6981495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
699431879ecSRichard Tran Mills   sparse_matrix_t     csrA,csrB,csrC;
700190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
701431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
702431879ecSRichard Tran Mills   PetscObjectState    state;
703431879ecSRichard Tran Mills 
704431879ecSRichard Tran Mills   PetscFunctionBegin;
705190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
706190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
707190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
708190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
709190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
710190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
711190ae7a4SRichard Tran Mills 
7129566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
7139566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7149566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
7159566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
716431879ecSRichard Tran Mills   csrA = a->csrA;
717431879ecSRichard Tran Mills   csrB = b->csrA;
718431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
719431879ecSRichard Tran Mills 
720190ae7a4SRichard Tran Mills   if (csrA && csrB) {
7215f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
722190ae7a4SRichard Tran Mills   } else {
723190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
724190ae7a4SRichard Tran Mills   }
725190ae7a4SRichard Tran Mills 
7269566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C));
727431879ecSRichard Tran Mills 
728431879ecSRichard Tran Mills   PetscFunctionReturn(0);
729431879ecSRichard Tran Mills }
730431879ecSRichard Tran Mills 
731190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
732e8be1fc7SRichard Tran Mills {
7331495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
734e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
735e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
736e8be1fc7SRichard Tran Mills   PetscObjectState    state;
737e8be1fc7SRichard Tran Mills 
738e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7399566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
7409566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7419566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
7429566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
743e8be1fc7SRichard Tran Mills   csrA = a->csrA;
744e8be1fc7SRichard Tran Mills   csrB = b->csrA;
745e8be1fc7SRichard Tran Mills   csrC = c->csrA;
746e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
747e8be1fc7SRichard Tran Mills 
748190ae7a4SRichard Tran Mills   if (csrA && csrB) {
7495f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
750190ae7a4SRichard Tran Mills   } else {
751190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
752190ae7a4SRichard Tran Mills   }
753e8be1fc7SRichard Tran Mills 
754e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
756e8be1fc7SRichard Tran Mills 
757e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
758e8be1fc7SRichard Tran Mills }
759e8be1fc7SRichard Tran Mills 
760190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
7614f53af40SRichard Tran Mills {
762190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7639566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
764190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
765190ae7a4SRichard Tran Mills }
766190ae7a4SRichard Tran Mills 
767190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
768190ae7a4SRichard Tran Mills {
769190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7709566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
771190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
772190ae7a4SRichard Tran Mills }
773190ae7a4SRichard Tran Mills 
774190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
775190ae7a4SRichard Tran Mills {
776190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7779566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
778190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
779190ae7a4SRichard Tran Mills }
780190ae7a4SRichard Tran Mills 
781190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
782190ae7a4SRichard Tran Mills {
783190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7849566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
785190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
786190ae7a4SRichard Tran Mills }
787190ae7a4SRichard Tran Mills 
788190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
789190ae7a4SRichard Tran Mills {
790190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7919566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
792190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
793190ae7a4SRichard Tran Mills }
794190ae7a4SRichard Tran Mills 
795190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
796190ae7a4SRichard Tran Mills {
797190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7989566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
799190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
800190ae7a4SRichard Tran Mills }
801190ae7a4SRichard Tran Mills 
802190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
803190ae7a4SRichard Tran Mills {
804190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
805190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
806190ae7a4SRichard Tran Mills 
807190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8089566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C));
809190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
810190ae7a4SRichard Tran Mills }
811190ae7a4SRichard Tran Mills 
812190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
813190ae7a4SRichard Tran Mills {
814190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
815190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
816190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
817190ae7a4SRichard Tran Mills 
818190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8199566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C));
820190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
821190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
822190ae7a4SRichard Tran Mills }
823190ae7a4SRichard Tran Mills 
82449ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
825190ae7a4SRichard Tran Mills {
826190ae7a4SRichard Tran Mills   Mat                 Ct;
827190ae7a4SRichard Tran Mills   Vec                 zeros;
8281495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
8294f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8304f53af40SRichard Tran Mills   PetscBool           set, flag;
831b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8324f53af40SRichard Tran Mills   PetscObjectState    state;
8334f53af40SRichard Tran Mills 
8344f53af40SRichard Tran Mills   PetscFunctionBegin;
8359566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A,&set,&flag));
836*08401ef6SPierre Jolivet   PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8374f53af40SRichard Tran Mills 
8389566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
8399566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8409566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
8419566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8424f53af40SRichard Tran Mills   csrA = a->csrA;
8434f53af40SRichard Tran Mills   csrP = p->csrA;
8444f53af40SRichard Tran Mills   csrC = c->csrA;
845b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
846190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
847b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8484f53af40SRichard Tran Mills 
849f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
8505f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
8514f53af40SRichard Tran Mills 
852190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
853190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
854190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8557301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
856190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
857190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
858190ae7a4SRichard Tran Mills    * the full matrix. */
8599566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8609566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct));
8619566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C,&zeros,NULL));
8629566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8639566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8649566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES));
8659566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN));
866190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
8679566063dSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C));
8689566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C,MATPRODUCT_PtAP));
8699566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8709566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8719566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8724f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8734f53af40SRichard Tran Mills }
874190ae7a4SRichard Tran Mills 
875190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
876190ae7a4SRichard Tran Mills {
877190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
878190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
8791495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
880190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
881190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
882190ae7a4SRichard Tran Mills   PetscObjectState    state;
883190ae7a4SRichard Tran Mills 
884190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8859566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
8869566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8879566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
8889566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
889190ae7a4SRichard Tran Mills   csrA = a->csrA;
890190ae7a4SRichard Tran Mills   csrP = p->csrA;
891190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
892190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
893190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
894190ae7a4SRichard Tran Mills 
895190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
896190ae7a4SRichard Tran Mills   if (csrP && csrA) {
8975f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
898190ae7a4SRichard Tran Mills   } else {
899190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
900190ae7a4SRichard Tran Mills   }
901190ae7a4SRichard Tran Mills 
902190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
903190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
90449ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
90549ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
90649ba5396SRichard Tran Mills    * is guaranteed. */
9079566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C));
908190ae7a4SRichard Tran Mills 
909190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
910190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
911190ae7a4SRichard Tran Mills }
912190ae7a4SRichard Tran Mills 
913190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
914190ae7a4SRichard Tran Mills {
915190ae7a4SRichard Tran Mills   PetscFunctionBegin;
916190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
917190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
918190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
919190ae7a4SRichard Tran Mills }
920190ae7a4SRichard Tran Mills 
921190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
922190ae7a4SRichard Tran Mills {
923190ae7a4SRichard Tran Mills   PetscFunctionBegin;
924190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
925190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
926190ae7a4SRichard Tran Mills }
927190ae7a4SRichard Tran Mills 
928190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
929190ae7a4SRichard Tran Mills {
930190ae7a4SRichard Tran Mills   PetscFunctionBegin;
931190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
932190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
933190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
934190ae7a4SRichard Tran Mills }
935190ae7a4SRichard Tran Mills 
936190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
937190ae7a4SRichard Tran Mills {
938190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
939190ae7a4SRichard Tran Mills   Mat            A = product->A;
940190ae7a4SRichard Tran Mills   PetscBool      set, flag;
941190ae7a4SRichard Tran Mills 
942190ae7a4SRichard Tran Mills   PetscFunctionBegin;
943a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9442ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9452ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
946190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9472ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
948190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
949a3d67537SPierre Jolivet   } else {
950190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9519566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A,&set,&flag));
952a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
953a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9541495fedeSRichard Tran Mills     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
955190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
956a3d67537SPierre Jolivet   }
957190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
958190ae7a4SRichard Tran Mills }
959190ae7a4SRichard Tran Mills 
960190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
961190ae7a4SRichard Tran Mills {
962190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9632ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
964190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
965190ae7a4SRichard Tran Mills }
966190ae7a4SRichard Tran Mills 
967190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
968190ae7a4SRichard Tran Mills {
969190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9702ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
971190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
972190ae7a4SRichard Tran Mills }
973190ae7a4SRichard Tran Mills 
974190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
975190ae7a4SRichard Tran Mills {
976190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
977190ae7a4SRichard Tran Mills 
978190ae7a4SRichard Tran Mills   PetscFunctionBegin;
979190ae7a4SRichard Tran Mills   switch (product->type) {
980190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
9819566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
982190ae7a4SRichard Tran Mills     break;
983190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
9849566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
985190ae7a4SRichard Tran Mills     break;
986190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
9879566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
988190ae7a4SRichard Tran Mills     break;
989190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
9909566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
991190ae7a4SRichard Tran Mills     break;
992190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
9939566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
994190ae7a4SRichard Tran Mills     break;
995190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
9969566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
997190ae7a4SRichard Tran Mills     break;
998190ae7a4SRichard Tran Mills   default:
999190ae7a4SRichard Tran Mills     break;
1000190ae7a4SRichard Tran Mills   }
1001190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1002190ae7a4SRichard Tran Mills }
1003431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1004190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
10054f53af40SRichard Tran Mills 
10064a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1007510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
10084a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
10094a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
10104a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
10114a2a386eSRichard Tran Mills {
10124a2a386eSRichard Tran Mills   Mat            B = *newmat;
10134a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1014c9d46305SRichard Tran Mills   PetscBool      set;
1015e9c94282SRichard Tran Mills   PetscBool      sametype;
10165f80ce2aSJacob Faibussowitsch   PetscErrorCode ierr;
10174a2a386eSRichard Tran Mills 
10184a2a386eSRichard Tran Mills   PetscFunctionBegin;
10199566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
10204a2a386eSRichard Tran Mills 
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1022e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1023e9c94282SRichard Tran Mills 
10249566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B,&aijmkl));
10254a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
10264a2a386eSRichard Tran Mills 
1027df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1028969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
10294a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
10304a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
10314a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1032c9d46305SRichard Tran Mills 
10334abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1036a8327b06SKarl Rupp #else
1037d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1038d995685eSRichard Tran Mills #endif
10395b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10404abfa3b3SRichard Tran Mills 
10414abfa3b3SRichard Tran Mills   /* Parse command line options. */
10429566063dSJacob Faibussowitsch   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");PetscCall(ierr);
10439566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set));
10449566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set));
10459566063dSJacob Faibussowitsch   ierr = PetscOptionsEnd();PetscCall(ierr);
1046ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1047d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10489566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1049d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1050d995685eSRichard Tran Mills   }
1051d995685eSRichard Tran Mills #endif
1052c9d46305SRichard Tran Mills 
1053ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1054df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1055969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1056df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1057969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10588a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1059190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1060190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1061190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1062190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1063190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1064ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
106549ba5396SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1066190ae7a4SRichard Tran Mills #   else
1067190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
10684f53af40SRichard Tran Mills #   endif
1069e8be1fc7SRichard Tran Mills # endif
10701950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10711950a7e7SRichard Tran Mills 
1072213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1073213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1074213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1075213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1076213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10771950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10784a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1079969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10804a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1081969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1082c9d46305SRichard Tran Mills   }
10831950a7e7SRichard Tran Mills #endif
10844a2a386eSRichard Tran Mills 
10859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ));
10864a2a386eSRichard Tran Mills 
10879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL));
10884a2a386eSRichard Tran Mills   *newmat = B;
10894a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10904a2a386eSRichard Tran Mills }
10914a2a386eSRichard Tran Mills 
10924a2a386eSRichard Tran Mills /*@C
10934a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
10944a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
10954a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
109690147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
109790147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1098597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1099597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1100597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
110190147e49SRichard Tran Mills 
1102d083f849SBarry Smith    Collective
11034a2a386eSRichard Tran Mills 
11044a2a386eSRichard Tran Mills    Input Parameters:
11054a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
11064a2a386eSRichard Tran Mills .  m - number of rows
11074a2a386eSRichard Tran Mills .  n - number of columns
11084a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
11094a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
11104a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
11114a2a386eSRichard Tran Mills 
11124a2a386eSRichard Tran Mills    Output Parameter:
11134a2a386eSRichard Tran Mills .  A - the matrix
11144a2a386eSRichard Tran Mills 
111590147e49SRichard Tran Mills    Options Database Keys:
111666b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
111766b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
111890147e49SRichard Tran Mills 
11194a2a386eSRichard Tran Mills    Notes:
11204a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
11214a2a386eSRichard Tran Mills 
11224a2a386eSRichard Tran Mills    Level: intermediate
11234a2a386eSRichard Tran Mills 
11244a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
11254a2a386eSRichard Tran Mills @*/
11264a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
11274a2a386eSRichard Tran Mills {
11284a2a386eSRichard Tran Mills   PetscFunctionBegin;
11299566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,A));
11309566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A,m,n,m,n));
11319566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A,MATSEQAIJMKL));
11329566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
11334a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11344a2a386eSRichard Tran Mills }
11354a2a386eSRichard Tran Mills 
11364a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
11374a2a386eSRichard Tran Mills {
11384a2a386eSRichard Tran Mills   PetscFunctionBegin;
11399566063dSJacob Faibussowitsch   PetscCall(MatSetType(A,MATSEQAIJ));
11409566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
11414a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11424a2a386eSRichard Tran Mills }
1143