xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 2e956fe4fc852fabc23b437482e1fb7b77fddb0d)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   Mat            B       = *newmat;
32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
334a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34c1d5218aSRichard Tran Mills #endif
354a2a386eSRichard Tran Mills 
364a2a386eSRichard Tran Mills   PetscFunctionBegin;
379566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
384a2a386eSRichard Tran Mills 
394a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4054871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
414a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4354871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
44ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4554871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
524f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
534a2a386eSRichard Tran Mills 
549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL));
554222ddf1SHong Zhang 
56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
574abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
59e9c94282SRichard Tran Mills    * the spptr pointer. */
60a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61a8327b06SKarl Rupp 
625f80ce2aSJacob Faibussowitsch   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
649566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
654a2a386eSRichard Tran Mills 
664a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
684a2a386eSRichard Tran Mills 
694a2a386eSRichard Tran Mills   *newmat = B;
704a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
714a2a386eSRichard Tran Mills }
724a2a386eSRichard Tran Mills 
734a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
744a2a386eSRichard Tran Mills {
754a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
764a2a386eSRichard Tran Mills 
774a2a386eSRichard Tran Mills   PetscFunctionBegin;
78e9c94282SRichard Tran Mills 
79edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80e9c94282SRichard Tran Mills   if (aijmkl) {
814a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
835f80ce2aSJacob Faibussowitsch     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
859566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
86e9c94282SRichard Tran Mills   }
874a2a386eSRichard Tran Mills 
884a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
894a2a386eSRichard Tran Mills    * to destroy everything that remains. */
909566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
914a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
924a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
934a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
94*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL));
959566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
964a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
974a2a386eSRichard Tran Mills }
984a2a386eSRichard Tran Mills 
99190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1005b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1015b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1025b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1035b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1045b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1055b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1066e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1074a2a386eSRichard Tran Mills {
108ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1096e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1106e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1116e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11245fbe478SRichard Tran Mills   PetscFunctionBegin;
1136e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1146e369cd5SRichard Tran Mills #else
115a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
116a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
117a8327b06SKarl Rupp   PetscInt         m,n;
118a8327b06SKarl Rupp   MatScalar        *aa;
119a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1204a2a386eSRichard Tran Mills 
121a8327b06SKarl Rupp   PetscFunctionBegin;
122e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
123e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
124e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
125e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
126e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1276e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1284d51fa23SRichard Tran Mills #endif
1296e369cd5SRichard Tran Mills 
1300632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1310632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1320632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1335f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
1340632b357SRichard Tran Mills   }
1358d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1366e369cd5SRichard Tran Mills 
137c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
138df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
139df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
140df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14158678438SRichard Tran Mills   m = A->rmap->n;
14258678438SRichard Tran Mills   n = A->cmap->n;
143df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
144df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
145df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1461495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1478d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1488d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
1495f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
1505f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
1515f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
1521950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
1535f80ce2aSJacob Faibussowitsch       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
1541950a7e7SRichard Tran Mills     }
1554abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
1569566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
157190ae7a4SRichard Tran Mills   } else {
158190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
159c9d46305SRichard Tran Mills   }
1606e369cd5SRichard Tran Mills 
1616e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
162d995685eSRichard Tran Mills #endif
1636e369cd5SRichard Tran Mills }
1646e369cd5SRichard Tran Mills 
165b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
166190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
167190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
16819afcda9SRichard Tran Mills {
16919afcda9SRichard Tran Mills   sparse_index_base_t indexing;
170190ae7a4SRichard Tran Mills   PetscInt            m,n;
17145fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
17219afcda9SRichard Tran Mills   MatScalar           *aa;
17319afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
17419afcda9SRichard Tran Mills 
175362febeeSStefano Zampini   PetscFunctionBegin;
176190ae7a4SRichard Tran Mills   if (csrA) {
17745fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
1785f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
1795f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
180190ae7a4SRichard Tran Mills   } else {
181190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
182190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
183aab60f1bSRichard Tran Mills   }
184190ae7a4SRichard Tran Mills 
1859566063dSJacob Faibussowitsch   PetscCall(MatSetType(A,MATSEQAIJ));
1869566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols));
187aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
188aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
189aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
190aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
191190ae7a4SRichard Tran Mills   if (csrA) {
1929566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL));
193190ae7a4SRichard Tran Mills   } else {
194190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1959566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1969566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1979566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
198190ae7a4SRichard Tran Mills   }
19919afcda9SRichard Tran Mills 
20019afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
20119afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
2029566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
2036c87cf42SRichard Tran Mills 
20419afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
20519afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2066c87cf42SRichard Tran Mills 
20719afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20819afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20919afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
210f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
211f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
212f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
213190ae7a4SRichard Tran Mills   if (csrA) {
2145f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
2155f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
2161950a7e7SRichard Tran Mills   }
2179566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
21819afcda9SRichard Tran Mills   PetscFunctionReturn(0);
21919afcda9SRichard Tran Mills }
220b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
221190ae7a4SRichard Tran Mills 
222e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
223e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
224e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
225b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
226190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
227e8be1fc7SRichard Tran Mills {
228e8be1fc7SRichard Tran Mills   PetscInt            i;
229e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
230e8be1fc7SRichard Tran Mills   PetscInt            nz;
231e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
232e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
2331495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
234e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
235e8be1fc7SRichard Tran Mills 
236362febeeSStefano Zampini   PetscFunctionBegin;
237190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
238190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
239190ae7a4SRichard Tran Mills 
240e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
2415f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
242e8be1fc7SRichard Tran Mills 
243e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
244e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
245e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
246e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
2479566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES));
248e8be1fc7SRichard Tran Mills   }
249e8be1fc7SRichard Tran Mills 
2509566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
252e8be1fc7SRichard Tran Mills 
2539566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
254a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
255a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
256a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
257a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
258e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
259e8be1fc7SRichard Tran Mills }
260b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
261e8be1fc7SRichard Tran Mills 
2623849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2633849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
2643849ddb2SRichard Tran Mills {
2653849ddb2SRichard Tran Mills   PetscInt            i,j,k;
2663849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
2673849ddb2SRichard Tran Mills   PetscInt            nz;
2683849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
2693849ddb2SRichard Tran Mills   PetscScalar         *aa;
2701495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
2713849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2723849ddb2SRichard Tran Mills 
273362febeeSStefano Zampini   PetscFunctionBegin;
2749566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2753849ddb2SRichard Tran Mills 
2763849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2773849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2789566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n"));
2793849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
2803849ddb2SRichard Tran Mills   }
2813849ddb2SRichard Tran Mills 
2823849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
2835f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
2843849ddb2SRichard Tran Mills 
2853849ddb2SRichard Tran Mills   k = 0;
2863849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
2879566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i));
2883849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
2893849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
2903849ddb2SRichard Tran Mills       if (aa) {
2919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k])));
2923849ddb2SRichard Tran Mills       } else {
2939566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]));
2943849ddb2SRichard Tran Mills       }
2953849ddb2SRichard Tran Mills       k++;
2963849ddb2SRichard Tran Mills     }
2979566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
2983849ddb2SRichard Tran Mills   }
2993849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
3003849ddb2SRichard Tran Mills }
3013849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3023849ddb2SRichard Tran Mills 
3036e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3046e369cd5SRichard Tran Mills {
3051495fedeSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3066e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3076e369cd5SRichard Tran Mills 
3086e369cd5SRichard Tran Mills   PetscFunctionBegin;
3099566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A,op,M));
3106e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
3119566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1));
3126e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3135b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3149566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3155b49642aSRichard Tran Mills   }
3166e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3176e369cd5SRichard Tran Mills }
3186e369cd5SRichard Tran Mills 
3196e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3206e369cd5SRichard Tran Mills {
3216e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3225b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3236e369cd5SRichard Tran Mills 
3246e369cd5SRichard Tran Mills   PetscFunctionBegin;
3256e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3266e369cd5SRichard Tran Mills 
3276e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3286e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3296e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3306e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
331d96e85feSRichard Tran Mills    * a lot of code duplication. */
3326e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3339566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3346e369cd5SRichard Tran Mills 
3355b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3365b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3375b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3385b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3399566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3405b49642aSRichard Tran Mills   }
341df555b71SRichard Tran Mills 
3424a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3434a2a386eSRichard Tran Mills }
3444a2a386eSRichard Tran Mills 
345e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3464a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3474a2a386eSRichard Tran Mills {
3484a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3494a2a386eSRichard Tran Mills   const PetscScalar *x;
3504a2a386eSRichard Tran Mills   PetscScalar       *y;
3514a2a386eSRichard Tran Mills   const MatScalar   *aa;
3524a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
353db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
354db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
355db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3564a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
357db63039fSRichard Tran Mills   char              matdescra[6];
358db63039fSRichard Tran Mills 
3594a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
360ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
361ff03dc53SRichard Tran Mills 
362ff03dc53SRichard Tran Mills   PetscFunctionBegin;
363db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
364db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
3659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
367ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
368ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
369ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
370ff03dc53SRichard Tran Mills 
371ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
372db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
373ff03dc53SRichard Tran Mills 
3749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
3759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
377ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
378ff03dc53SRichard Tran Mills }
3791950a7e7SRichard Tran Mills #endif
380ff03dc53SRichard Tran Mills 
381ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
382df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
383df555b71SRichard Tran Mills {
384df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
385df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
386df555b71SRichard Tran Mills   const PetscScalar *x;
387df555b71SRichard Tran Mills   PetscScalar       *y;
388551aa5c8SRichard Tran Mills   PetscObjectState  state;
389df555b71SRichard Tran Mills 
390df555b71SRichard Tran Mills   PetscFunctionBegin;
391df555b71SRichard Tran Mills 
39238987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
39338987b35SRichard Tran Mills   if (!a->nz) {
3949566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy,&y));
3959566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y,A->rmap->n));
3969566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy,&y));
39738987b35SRichard Tran Mills     PetscFunctionReturn(0);
39838987b35SRichard Tran Mills   }
399f36dfe3fSRichard Tran Mills 
4009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4019566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
402df555b71SRichard Tran Mills 
4033fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4043fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4053fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4069566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
4079566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4083fa15762SRichard Tran Mills 
409df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
4105f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
411df555b71SRichard Tran Mills 
4129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
415df555b71SRichard Tran Mills   PetscFunctionReturn(0);
416df555b71SRichard Tran Mills }
417d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
418df555b71SRichard Tran Mills 
419e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
420ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
421ff03dc53SRichard Tran Mills {
422ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
423ff03dc53SRichard Tran Mills   const PetscScalar *x;
424ff03dc53SRichard Tran Mills   PetscScalar       *y;
425ff03dc53SRichard Tran Mills   const MatScalar   *aa;
426ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
427db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
428db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
429db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
430ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
431db63039fSRichard Tran Mills   char              matdescra[6];
432ff03dc53SRichard Tran Mills 
433ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
434ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4354a2a386eSRichard Tran Mills 
4364a2a386eSRichard Tran Mills   PetscFunctionBegin;
437969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
438969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
4414a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4424a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4434a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4444a2a386eSRichard Tran Mills 
4454a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
446db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4474a2a386eSRichard Tran Mills 
4489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
4514a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4524a2a386eSRichard Tran Mills }
4531950a7e7SRichard Tran Mills #endif
4544a2a386eSRichard Tran Mills 
455ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
456df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
457df555b71SRichard Tran Mills {
458df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
459df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
460df555b71SRichard Tran Mills   const PetscScalar *x;
461df555b71SRichard Tran Mills   PetscScalar       *y;
462551aa5c8SRichard Tran Mills   PetscObjectState  state;
463df555b71SRichard Tran Mills 
464df555b71SRichard Tran Mills   PetscFunctionBegin;
465df555b71SRichard Tran Mills 
46638987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
46738987b35SRichard Tran Mills   if (!a->nz) {
4689566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy,&y));
4699566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y,A->cmap->n));
4709566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy,&y));
47138987b35SRichard Tran Mills     PetscFunctionReturn(0);
47238987b35SRichard Tran Mills   }
473f36dfe3fSRichard Tran Mills 
4749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
476df555b71SRichard Tran Mills 
4773fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4783fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4793fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4809566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
4819566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4823fa15762SRichard Tran Mills 
483df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
4845f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
485df555b71SRichard Tran Mills 
4869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
4879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
489df555b71SRichard Tran Mills   PetscFunctionReturn(0);
490df555b71SRichard Tran Mills }
491d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
492df555b71SRichard Tran Mills 
493e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
4944a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4954a2a386eSRichard Tran Mills {
4964a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
4974a2a386eSRichard Tran Mills   const PetscScalar *x;
4984a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
4994a2a386eSRichard Tran Mills   const MatScalar   *aa;
5004a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
501db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5024a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5034a2a386eSRichard Tran Mills   PetscInt          i;
5044a2a386eSRichard Tran Mills 
505ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
506ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
507a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
508db63039fSRichard Tran Mills   PetscScalar       beta;
509a84739b8SRichard Tran Mills   char              matdescra[6];
510ff03dc53SRichard Tran Mills 
511ff03dc53SRichard Tran Mills   PetscFunctionBegin;
512a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
513a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
514a84739b8SRichard Tran Mills 
5159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
517ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
518ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
519ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
520ff03dc53SRichard Tran Mills 
521ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
522a84739b8SRichard Tran Mills   if (zz == yy) {
523a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
524db63039fSRichard Tran Mills     beta = 1.0;
525db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
526a84739b8SRichard Tran Mills   } else {
527db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
528db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
529db63039fSRichard Tran Mills     beta = 0.0;
530db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
531ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
532ff03dc53SRichard Tran Mills       z[i] += y[i];
533ff03dc53SRichard Tran Mills     }
534a84739b8SRichard Tran Mills   }
535ff03dc53SRichard Tran Mills 
5369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
5379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
539ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
540ff03dc53SRichard Tran Mills }
5411950a7e7SRichard Tran Mills #endif
542ff03dc53SRichard Tran Mills 
543ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
544df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
545df555b71SRichard Tran Mills {
546df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
547df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
548df555b71SRichard Tran Mills   const PetscScalar *x;
549df555b71SRichard Tran Mills   PetscScalar       *y,*z;
550df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
551df555b71SRichard Tran Mills   PetscInt          i;
552df555b71SRichard Tran Mills 
553df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
554551aa5c8SRichard Tran Mills   PetscObjectState  state;
555df555b71SRichard Tran Mills 
556df555b71SRichard Tran Mills   PetscFunctionBegin;
557df555b71SRichard Tran Mills 
55838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
55938987b35SRichard Tran Mills   if (!a->nz) {
5609566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
5619566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z,y,m));
5629566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
56338987b35SRichard Tran Mills     PetscFunctionReturn(0);
56438987b35SRichard Tran Mills   }
565df555b71SRichard Tran Mills 
5669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
568df555b71SRichard Tran Mills 
5693fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5703fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5713fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5729566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
5739566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5743fa15762SRichard Tran Mills 
575df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
576df555b71SRichard Tran Mills   if (zz == yy) {
577df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
578df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
5795f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
580df555b71SRichard Tran Mills   } else {
581df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
582df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
5835f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
5845f80ce2aSJacob Faibussowitsch     for (i=0; i<m; i++) z[i] += y[i];
585df555b71SRichard Tran Mills   }
586df555b71SRichard Tran Mills 
5879566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
5889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
590df555b71SRichard Tran Mills   PetscFunctionReturn(0);
591df555b71SRichard Tran Mills }
592d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
593df555b71SRichard Tran Mills 
594e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
595ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
596ff03dc53SRichard Tran Mills {
597ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
598ff03dc53SRichard Tran Mills   const PetscScalar *x;
599ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
600ff03dc53SRichard Tran Mills   const MatScalar   *aa;
601ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
602db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
603ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
604ff03dc53SRichard Tran Mills   PetscInt          i;
605ff03dc53SRichard Tran Mills 
606ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
607ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
608a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
609db63039fSRichard Tran Mills   PetscScalar       beta;
610a84739b8SRichard Tran Mills   char              matdescra[6];
6114a2a386eSRichard Tran Mills 
6124a2a386eSRichard Tran Mills   PetscFunctionBegin;
613a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
614a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
615a84739b8SRichard Tran Mills 
6169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
6184a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6194a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6204a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6214a2a386eSRichard Tran Mills 
6224a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
623a84739b8SRichard Tran Mills   if (zz == yy) {
624a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
625db63039fSRichard Tran Mills     beta = 1.0;
626969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
627a84739b8SRichard Tran Mills   } else {
628db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
629db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
630db63039fSRichard Tran Mills     beta = 0.0;
631db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
632969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6334a2a386eSRichard Tran Mills       z[i] += y[i];
6344a2a386eSRichard Tran Mills     }
635a84739b8SRichard Tran Mills   }
6364a2a386eSRichard Tran Mills 
6379566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
6389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
6404a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6414a2a386eSRichard Tran Mills }
6421950a7e7SRichard Tran Mills #endif
6434a2a386eSRichard Tran Mills 
644ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
645df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
646df555b71SRichard Tran Mills {
647df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
648df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
649df555b71SRichard Tran Mills   const PetscScalar *x;
650df555b71SRichard Tran Mills   PetscScalar       *y,*z;
651969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
652df555b71SRichard Tran Mills   PetscInt          i;
653551aa5c8SRichard Tran Mills   PetscObjectState  state;
654df555b71SRichard Tran Mills 
655df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
656df555b71SRichard Tran Mills 
657df555b71SRichard Tran Mills   PetscFunctionBegin;
658df555b71SRichard Tran Mills 
65938987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
66038987b35SRichard Tran Mills   if (!a->nz) {
6619566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
6629566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z,y,n));
6639566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
66438987b35SRichard Tran Mills     PetscFunctionReturn(0);
66538987b35SRichard Tran Mills   }
666f36dfe3fSRichard Tran Mills 
6679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
669df555b71SRichard Tran Mills 
6703fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6713fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6723fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6739566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
6745f80ce2aSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
6753fa15762SRichard Tran Mills 
676df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
677df555b71SRichard Tran Mills   if (zz == yy) {
678df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
679df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
6805f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
681df555b71SRichard Tran Mills   } else {
682df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
683df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
6845f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
6855f80ce2aSJacob Faibussowitsch     for (i=0; i<n; i++) z[i] += y[i];
686df555b71SRichard Tran Mills   }
687df555b71SRichard Tran Mills 
6889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*a->nz));
6899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
691df555b71SRichard Tran Mills   PetscFunctionReturn(0);
692df555b71SRichard Tran Mills }
693d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
694df555b71SRichard Tran Mills 
695190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
6968a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
697190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
698431879ecSRichard Tran Mills {
6991495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
700431879ecSRichard Tran Mills   sparse_matrix_t     csrA,csrB,csrC;
701190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
702431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
703431879ecSRichard Tran Mills   PetscObjectState    state;
704431879ecSRichard Tran Mills 
705431879ecSRichard Tran Mills   PetscFunctionBegin;
706190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
707190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
708190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
709190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
710190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
711190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
712190ae7a4SRichard Tran Mills 
7139566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
7149566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7159566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
7169566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
717431879ecSRichard Tran Mills   csrA = a->csrA;
718431879ecSRichard Tran Mills   csrB = b->csrA;
719431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720431879ecSRichard Tran Mills 
721190ae7a4SRichard Tran Mills   if (csrA && csrB) {
7225f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
723190ae7a4SRichard Tran Mills   } else {
724190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
725190ae7a4SRichard Tran Mills   }
726190ae7a4SRichard Tran Mills 
7279566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C));
728431879ecSRichard Tran Mills 
729431879ecSRichard Tran Mills   PetscFunctionReturn(0);
730431879ecSRichard Tran Mills }
731431879ecSRichard Tran Mills 
732190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
733e8be1fc7SRichard Tran Mills {
7341495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
735e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
736e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
737e8be1fc7SRichard Tran Mills   PetscObjectState    state;
738e8be1fc7SRichard Tran Mills 
739e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7409566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
7419566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7429566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
7439566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
744e8be1fc7SRichard Tran Mills   csrA = a->csrA;
745e8be1fc7SRichard Tran Mills   csrB = b->csrA;
746e8be1fc7SRichard Tran Mills   csrC = c->csrA;
747e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
748e8be1fc7SRichard Tran Mills 
749190ae7a4SRichard Tran Mills   if (csrA && csrB) {
7505f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
751190ae7a4SRichard Tran Mills   } else {
752190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
753190ae7a4SRichard Tran Mills   }
754e8be1fc7SRichard Tran Mills 
755e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7569566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
757e8be1fc7SRichard Tran Mills 
758e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
759e8be1fc7SRichard Tran Mills }
760e8be1fc7SRichard Tran Mills 
761190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
7624f53af40SRichard Tran Mills {
763190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7649566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
765190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
766190ae7a4SRichard Tran Mills }
767190ae7a4SRichard Tran Mills 
768190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
769190ae7a4SRichard Tran Mills {
770190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7719566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
772190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
773190ae7a4SRichard Tran Mills }
774190ae7a4SRichard Tran Mills 
775190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
776190ae7a4SRichard Tran Mills {
777190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7789566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
779190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
780190ae7a4SRichard Tran Mills }
781190ae7a4SRichard Tran Mills 
782190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
783190ae7a4SRichard Tran Mills {
784190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7859566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
786190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
787190ae7a4SRichard Tran Mills }
788190ae7a4SRichard Tran Mills 
789190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
790190ae7a4SRichard Tran Mills {
791190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7929566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
793190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
794190ae7a4SRichard Tran Mills }
795190ae7a4SRichard Tran Mills 
796190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
797190ae7a4SRichard Tran Mills {
798190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7999566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
800190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
801190ae7a4SRichard Tran Mills }
802190ae7a4SRichard Tran Mills 
803190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
804190ae7a4SRichard Tran Mills {
805190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
806190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
807190ae7a4SRichard Tran Mills 
808190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8099566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C));
810190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
811190ae7a4SRichard Tran Mills }
812190ae7a4SRichard Tran Mills 
813190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
814190ae7a4SRichard Tran Mills {
815190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
816190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
817190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
818190ae7a4SRichard Tran Mills 
819190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8209566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C));
821190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
822190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
823190ae7a4SRichard Tran Mills }
824190ae7a4SRichard Tran Mills 
82549ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
826190ae7a4SRichard Tran Mills {
827190ae7a4SRichard Tran Mills   Mat                 Ct;
828190ae7a4SRichard Tran Mills   Vec                 zeros;
8291495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
8304f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8314f53af40SRichard Tran Mills   PetscBool           set, flag;
832b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8334f53af40SRichard Tran Mills   PetscObjectState    state;
8344f53af40SRichard Tran Mills 
8354f53af40SRichard Tran Mills   PetscFunctionBegin;
8369566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A,&set,&flag));
83708401ef6SPierre Jolivet   PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8384f53af40SRichard Tran Mills 
8399566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
8409566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8419566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
8429566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8434f53af40SRichard Tran Mills   csrA = a->csrA;
8444f53af40SRichard Tran Mills   csrP = p->csrA;
8454f53af40SRichard Tran Mills   csrC = c->csrA;
846b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
847190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
848b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8494f53af40SRichard Tran Mills 
850f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
8515f80ce2aSJacob Faibussowitsch   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
8524f53af40SRichard Tran Mills 
853190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
854190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
855190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8567301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
857190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
858190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
859190ae7a4SRichard Tran Mills    * the full matrix. */
8609566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8619566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct));
8629566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C,&zeros,NULL));
8639566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8649566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8659566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES));
8669566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN));
867190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
8689566063dSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C));
8699566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C,MATPRODUCT_PtAP));
8709566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8719566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8729566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8734f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8744f53af40SRichard Tran Mills }
875190ae7a4SRichard Tran Mills 
876190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
877190ae7a4SRichard Tran Mills {
878190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
879190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
8801495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
881190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
882190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
883190ae7a4SRichard Tran Mills   PetscObjectState    state;
884190ae7a4SRichard Tran Mills 
885190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8869566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
8879566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8889566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
8899566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
890190ae7a4SRichard Tran Mills   csrA = a->csrA;
891190ae7a4SRichard Tran Mills   csrP = p->csrA;
892190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
893190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
894190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
895190ae7a4SRichard Tran Mills 
896190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
897190ae7a4SRichard Tran Mills   if (csrP && csrA) {
8985f80ce2aSJacob Faibussowitsch     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
899190ae7a4SRichard Tran Mills   } else {
900190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
901190ae7a4SRichard Tran Mills   }
902190ae7a4SRichard Tran Mills 
903190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
904190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
90549ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
90649ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
90749ba5396SRichard Tran Mills    * is guaranteed. */
9089566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C));
909190ae7a4SRichard Tran Mills 
910190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
911190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
912190ae7a4SRichard Tran Mills }
913190ae7a4SRichard Tran Mills 
914190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
915190ae7a4SRichard Tran Mills {
916190ae7a4SRichard Tran Mills   PetscFunctionBegin;
917190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
918190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
919190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
920190ae7a4SRichard Tran Mills }
921190ae7a4SRichard Tran Mills 
922190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
923190ae7a4SRichard Tran Mills {
924190ae7a4SRichard Tran Mills   PetscFunctionBegin;
925190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
926190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
927190ae7a4SRichard Tran Mills }
928190ae7a4SRichard Tran Mills 
929190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
930190ae7a4SRichard Tran Mills {
931190ae7a4SRichard Tran Mills   PetscFunctionBegin;
932190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
933190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
934190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
935190ae7a4SRichard Tran Mills }
936190ae7a4SRichard Tran Mills 
937190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
938190ae7a4SRichard Tran Mills {
939190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
940190ae7a4SRichard Tran Mills   Mat            A = product->A;
941190ae7a4SRichard Tran Mills   PetscBool      set, flag;
942190ae7a4SRichard Tran Mills 
943190ae7a4SRichard Tran Mills   PetscFunctionBegin;
944a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9452ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9462ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
947190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9482ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
949190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
950a3d67537SPierre Jolivet   } else {
951190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9529566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A,&set,&flag));
953a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
954a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9551495fedeSRichard Tran Mills     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
956190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
957a3d67537SPierre Jolivet   }
958190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
959190ae7a4SRichard Tran Mills }
960190ae7a4SRichard Tran Mills 
961190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
962190ae7a4SRichard Tran Mills {
963190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9642ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
965190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
966190ae7a4SRichard Tran Mills }
967190ae7a4SRichard Tran Mills 
968190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
969190ae7a4SRichard Tran Mills {
970190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9712ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
972190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
973190ae7a4SRichard Tran Mills }
974190ae7a4SRichard Tran Mills 
975190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
976190ae7a4SRichard Tran Mills {
977190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
978190ae7a4SRichard Tran Mills 
979190ae7a4SRichard Tran Mills   PetscFunctionBegin;
980190ae7a4SRichard Tran Mills   switch (product->type) {
981190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
9829566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
983190ae7a4SRichard Tran Mills     break;
984190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
9859566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
986190ae7a4SRichard Tran Mills     break;
987190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
9889566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
989190ae7a4SRichard Tran Mills     break;
990190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
9919566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
992190ae7a4SRichard Tran Mills     break;
993190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
9949566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
995190ae7a4SRichard Tran Mills     break;
996190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
9979566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
998190ae7a4SRichard Tran Mills     break;
999190ae7a4SRichard Tran Mills   default:
1000190ae7a4SRichard Tran Mills     break;
1001190ae7a4SRichard Tran Mills   }
1002190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1003190ae7a4SRichard Tran Mills }
1004431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1005190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
10064f53af40SRichard Tran Mills 
10074a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1008510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
10094a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
10104a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
10114a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
10124a2a386eSRichard Tran Mills {
10134a2a386eSRichard Tran Mills   Mat            B = *newmat;
10144a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1015c9d46305SRichard Tran Mills   PetscBool      set;
1016e9c94282SRichard Tran Mills   PetscBool      sametype;
10174a2a386eSRichard Tran Mills 
10184a2a386eSRichard Tran Mills   PetscFunctionBegin;
10199566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
10204a2a386eSRichard Tran Mills 
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1022e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1023e9c94282SRichard Tran Mills 
10249566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B,&aijmkl));
10254a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
10264a2a386eSRichard Tran Mills 
1027df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1028969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
10294a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
10304a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
10314a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1032c9d46305SRichard Tran Mills 
10334abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1036a8327b06SKarl Rupp #else
1037d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1038d995685eSRichard Tran Mills #endif
10395b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10404abfa3b3SRichard Tran Mills 
10414abfa3b3SRichard Tran Mills   /* Parse command line options. */
1042d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
10439566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set));
10449566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set));
1045d0609cedSBarry Smith   PetscOptionsEnd();
1046ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1047d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10489566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1049d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1050d995685eSRichard Tran Mills   }
1051d995685eSRichard Tran Mills #endif
1052c9d46305SRichard Tran Mills 
1053ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1054df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1055969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1056df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1057969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10588a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1059190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1060190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1061190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1062190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1063190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1064ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
106549ba5396SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1066190ae7a4SRichard Tran Mills #   else
1067190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
10684f53af40SRichard Tran Mills #   endif
1069e8be1fc7SRichard Tran Mills # endif
10701950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10711950a7e7SRichard Tran Mills 
1072213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1073213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1074213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1075213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1076213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10771950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10784a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1079969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10804a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1081969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1082c9d46305SRichard Tran Mills   }
10831950a7e7SRichard Tran Mills #endif
10844a2a386eSRichard Tran Mills 
10859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ));
10864a2a386eSRichard Tran Mills 
10879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL));
10884a2a386eSRichard Tran Mills   *newmat = B;
10894a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10904a2a386eSRichard Tran Mills }
10914a2a386eSRichard Tran Mills 
10924a2a386eSRichard Tran Mills /*@C
10934a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
10944a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
10954a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
109690147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
109790147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1098597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1099597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1100597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
110190147e49SRichard Tran Mills 
1102d083f849SBarry Smith    Collective
11034a2a386eSRichard Tran Mills 
11044a2a386eSRichard Tran Mills    Input Parameters:
11054a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
11064a2a386eSRichard Tran Mills .  m - number of rows
11074a2a386eSRichard Tran Mills .  n - number of columns
11084a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
11094a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
11104a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
11114a2a386eSRichard Tran Mills 
11124a2a386eSRichard Tran Mills    Output Parameter:
11134a2a386eSRichard Tran Mills .  A - the matrix
11144a2a386eSRichard Tran Mills 
111590147e49SRichard Tran Mills    Options Database Keys:
111666b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
111766b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
111890147e49SRichard Tran Mills 
11194a2a386eSRichard Tran Mills    Notes:
11204a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
11214a2a386eSRichard Tran Mills 
11224a2a386eSRichard Tran Mills    Level: intermediate
11234a2a386eSRichard Tran Mills 
1124db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
11254a2a386eSRichard Tran Mills @*/
11264a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
11274a2a386eSRichard Tran Mills {
11284a2a386eSRichard Tran Mills   PetscFunctionBegin;
11299566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,A));
11309566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A,m,n,m,n));
11319566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A,MATSEQAIJMKL));
11329566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
11334a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11344a2a386eSRichard Tran Mills }
11354a2a386eSRichard Tran Mills 
11364a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
11374a2a386eSRichard Tran Mills {
11384a2a386eSRichard Tran Mills   PetscFunctionBegin;
11399566063dSJacob Faibussowitsch   PetscCall(MatSetType(A,MATSEQAIJ));
11409566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
11414a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11424a2a386eSRichard Tran Mills }
1143