xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 362febeeeb69b91ebadcb4b2dc0a22cb6dfc4097)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35c1d5218aSRichard Tran Mills #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
554f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
584222ddf1SHong Zhang 
59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
604abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
62e9c94282SRichard Tran Mills    * the spptr pointer. */
63a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
64a8327b06SKarl Rupp 
654abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
660632b357SRichard Tran Mills     sparse_status_t stat;
674abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
68db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
694abfa3b3SRichard Tran Mills   }
70ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
724a2a386eSRichard Tran Mills 
734a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
744a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
754a2a386eSRichard Tran Mills 
764a2a386eSRichard Tran Mills   *newmat = B;
774a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
784a2a386eSRichard Tran Mills }
794a2a386eSRichard Tran Mills 
804a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
814a2a386eSRichard Tran Mills {
824a2a386eSRichard Tran Mills   PetscErrorCode ierr;
834a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
844a2a386eSRichard Tran Mills 
854a2a386eSRichard Tran Mills   PetscFunctionBegin;
86e9c94282SRichard Tran Mills 
87edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88e9c94282SRichard Tran Mills   if (aijmkl) {
894a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
914abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
924abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
934abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
94db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
954abfa3b3SRichard Tran Mills     }
964abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
974a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
98e9c94282SRichard Tran Mills   }
994a2a386eSRichard Tran Mills 
1004a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1014a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1024a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1034a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1044a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1054a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1064a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1074a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1084a2a386eSRichard Tran Mills }
1094a2a386eSRichard Tran Mills 
110190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1115b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1125b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1135b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1145b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1155b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1165b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1176e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1184a2a386eSRichard Tran Mills {
119ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1206e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1216e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1226e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
12345fbe478SRichard Tran Mills   PetscFunctionBegin;
1246e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1256e369cd5SRichard Tran Mills #else
126a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128a8327b06SKarl Rupp   PetscInt         m,n;
129a8327b06SKarl Rupp   MatScalar        *aa;
130a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1316e369cd5SRichard Tran Mills   sparse_status_t  stat;
132551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1334a2a386eSRichard Tran Mills 
134a8327b06SKarl Rupp   PetscFunctionBegin;
135e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1406e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1414d51fa23SRichard Tran Mills #endif
1426e369cd5SRichard Tran Mills 
1430632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1440632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1450632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1460632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
147db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
1480632b357SRichard Tran Mills   }
1498d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1506e369cd5SRichard Tran Mills 
151c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
152df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
15558678438SRichard Tran Mills   m = A->rmap->n;
15658678438SRichard Tran Mills   n = A->cmap->n;
157df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1601495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1618d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1628d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
16358678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
1691950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
170df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
171db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
1721950a7e7SRichard Tran Mills     }
1734abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
174e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
175190ae7a4SRichard Tran Mills   } else {
176190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
177c9d46305SRichard Tran Mills   }
1786e369cd5SRichard Tran Mills 
1796e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
180d995685eSRichard Tran Mills #endif
1816e369cd5SRichard Tran Mills }
1826e369cd5SRichard Tran Mills 
183b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
18619afcda9SRichard Tran Mills {
18719afcda9SRichard Tran Mills   PetscErrorCode      ierr;
18819afcda9SRichard Tran Mills   sparse_status_t     stat;
18919afcda9SRichard Tran Mills   sparse_index_base_t indexing;
190190ae7a4SRichard Tran Mills   PetscInt            m,n;
19145fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
19219afcda9SRichard Tran Mills   MatScalar           *aa;
19319afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
19419afcda9SRichard Tran Mills 
195*362febeeSStefano Zampini   PetscFunctionBegin;
196190ae7a4SRichard Tran Mills   if (csrA) {
19745fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
198190ae7a4SRichard Tran Mills     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
1999c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
200190ae7a4SRichard Tran Mills     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
201190ae7a4SRichard Tran Mills   } else {
202190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
203190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
204aab60f1bSRichard Tran Mills   }
205190ae7a4SRichard Tran Mills 
20619afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
20745fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
208aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
209aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
210aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
211aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
212190ae7a4SRichard Tran Mills   if (csrA) {
213190ae7a4SRichard Tran Mills     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
214190ae7a4SRichard Tran Mills   } else {
215190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
216190ae7a4SRichard Tran Mills     ierr = MatSetUp(A);CHKERRQ(ierr);
217190ae7a4SRichard Tran Mills     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
218190ae7a4SRichard Tran Mills     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
219190ae7a4SRichard Tran Mills   }
22019afcda9SRichard Tran Mills 
22119afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22219afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22319afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2246c87cf42SRichard Tran Mills 
22519afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
22619afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2276c87cf42SRichard Tran Mills 
22819afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
22919afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23019afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
231f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
232f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
233f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
234190ae7a4SRichard Tran Mills   if (csrA) {
23519afcda9SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
236db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
23719afcda9SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
238db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
2391950a7e7SRichard Tran Mills   }
240e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
24119afcda9SRichard Tran Mills   PetscFunctionReturn(0);
24219afcda9SRichard Tran Mills }
243b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
244190ae7a4SRichard Tran Mills 
245e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
246e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
247e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
248b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
249190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
250e8be1fc7SRichard Tran Mills {
251e8be1fc7SRichard Tran Mills   PetscInt            i;
252e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
253e8be1fc7SRichard Tran Mills   PetscInt            nz;
254e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
255e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
256e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
2571495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
258e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
259e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
260e8be1fc7SRichard Tran Mills 
261*362febeeSStefano Zampini   PetscFunctionBegin;
262190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
263190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
264190ae7a4SRichard Tran Mills 
265e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
266e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
267e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
268e8be1fc7SRichard Tran Mills 
269e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
270e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
271e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
272e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
273e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
274e8be1fc7SRichard Tran Mills   }
275e8be1fc7SRichard Tran Mills 
276e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
277e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
278e8be1fc7SRichard Tran Mills 
279e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
280a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
281a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
282a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
283a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
284e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
285e8be1fc7SRichard Tran Mills }
286b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
287e8be1fc7SRichard Tran Mills 
2883849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2893849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
2903849ddb2SRichard Tran Mills {
2913849ddb2SRichard Tran Mills   PetscInt            i,j,k;
2923849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
2933849ddb2SRichard Tran Mills   PetscInt            nz;
2943849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
2953849ddb2SRichard Tran Mills   PetscScalar         *aa;
2963849ddb2SRichard Tran Mills   PetscErrorCode      ierr;
2971495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
2983849ddb2SRichard Tran Mills   sparse_status_t     stat;
2993849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
3003849ddb2SRichard Tran Mills 
301*362febeeSStefano Zampini   PetscFunctionBegin;
3023849ddb2SRichard Tran Mills   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
3033849ddb2SRichard Tran Mills 
3043849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
3053849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
3063849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
3073849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
3083849ddb2SRichard Tran Mills   }
3093849ddb2SRichard Tran Mills 
3103849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
3113849ddb2SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
3123849ddb2SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
3133849ddb2SRichard Tran Mills 
3143849ddb2SRichard Tran Mills   k = 0;
3153849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
3163849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
3173849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
3183849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
3193849ddb2SRichard Tran Mills       if (aa) {
3203f597eacSJose E. Roman         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr);
3213849ddb2SRichard Tran Mills       } else {
3223849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
3233849ddb2SRichard Tran Mills       }
3243849ddb2SRichard Tran Mills       k++;
3253849ddb2SRichard Tran Mills     }
3263849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
3273849ddb2SRichard Tran Mills   }
3283849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
3293849ddb2SRichard Tran Mills }
3303849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3313849ddb2SRichard Tran Mills 
3326e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3336e369cd5SRichard Tran Mills {
3346e369cd5SRichard Tran Mills   PetscErrorCode ierr;
3351495fedeSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3366e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3376e369cd5SRichard Tran Mills 
3386e369cd5SRichard Tran Mills   PetscFunctionBegin;
3396e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
3406e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
341580bdb30SBarry Smith   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
3426e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3435b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3446e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3455b49642aSRichard Tran Mills   }
3466e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3476e369cd5SRichard Tran Mills }
3486e369cd5SRichard Tran Mills 
3496e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3506e369cd5SRichard Tran Mills {
3516e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3526e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3535b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3546e369cd5SRichard Tran Mills 
3556e369cd5SRichard Tran Mills   PetscFunctionBegin;
3566e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3576e369cd5SRichard Tran Mills 
3586e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3596e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3606e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3616e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
362d96e85feSRichard Tran Mills    * a lot of code duplication. */
3636e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3646e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3656e369cd5SRichard Tran Mills 
3665b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3675b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3685b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3695b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3706e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3715b49642aSRichard Tran Mills   }
372df555b71SRichard Tran Mills 
3734a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3744a2a386eSRichard Tran Mills }
3754a2a386eSRichard Tran Mills 
376e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3774a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3784a2a386eSRichard Tran Mills {
3794a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3804a2a386eSRichard Tran Mills   const PetscScalar *x;
3814a2a386eSRichard Tran Mills   PetscScalar       *y;
3824a2a386eSRichard Tran Mills   const MatScalar   *aa;
3834a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3844a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
385db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
386db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
387db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3884a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
389db63039fSRichard Tran Mills   char              matdescra[6];
390db63039fSRichard Tran Mills 
3914a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
392ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
393ff03dc53SRichard Tran Mills 
394ff03dc53SRichard Tran Mills   PetscFunctionBegin;
395db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
396db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
397ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
398ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
400ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
401ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
402ff03dc53SRichard Tran Mills 
403ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
404db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
405ff03dc53SRichard Tran Mills 
406ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
407ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
408ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
409ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
410ff03dc53SRichard Tran Mills }
4111950a7e7SRichard Tran Mills #endif
412ff03dc53SRichard Tran Mills 
413ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
414df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
415df555b71SRichard Tran Mills {
416df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
417df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
418df555b71SRichard Tran Mills   const PetscScalar *x;
419df555b71SRichard Tran Mills   PetscScalar       *y;
420df555b71SRichard Tran Mills   PetscErrorCode    ierr;
421df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
422551aa5c8SRichard Tran Mills   PetscObjectState  state;
423df555b71SRichard Tran Mills 
424df555b71SRichard Tran Mills   PetscFunctionBegin;
425df555b71SRichard Tran Mills 
42638987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
42738987b35SRichard Tran Mills   if (!a->nz) {
42838987b35SRichard Tran Mills     PetscInt i;
42938987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
43038987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
43138987b35SRichard Tran Mills     for (i=0; i<m; i++) {
43238987b35SRichard Tran Mills       y[i] = 0.0;
43338987b35SRichard Tran Mills     }
43438987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
43538987b35SRichard Tran Mills     PetscFunctionReturn(0);
43638987b35SRichard Tran Mills   }
437f36dfe3fSRichard Tran Mills 
438df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
439df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
440df555b71SRichard Tran Mills 
4413fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4423fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4433fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
444551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
445551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4463fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4473fa15762SRichard Tran Mills   }
4483fa15762SRichard Tran Mills 
449df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
450df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
451db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
452df555b71SRichard Tran Mills 
453df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
454df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
455df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
456df555b71SRichard Tran Mills   PetscFunctionReturn(0);
457df555b71SRichard Tran Mills }
458d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
459df555b71SRichard Tran Mills 
460e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
461ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
462ff03dc53SRichard Tran Mills {
463ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
464ff03dc53SRichard Tran Mills   const PetscScalar *x;
465ff03dc53SRichard Tran Mills   PetscScalar       *y;
466ff03dc53SRichard Tran Mills   const MatScalar   *aa;
467ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
468ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
469db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
470db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
471db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
472ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
473db63039fSRichard Tran Mills   char              matdescra[6];
474ff03dc53SRichard Tran Mills 
475ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
476ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4774a2a386eSRichard Tran Mills 
4784a2a386eSRichard Tran Mills   PetscFunctionBegin;
479969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
480969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4814a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4824a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4834a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4844a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4854a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4864a2a386eSRichard Tran Mills 
4874a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
488db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4894a2a386eSRichard Tran Mills 
4904a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4914a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4924a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4934a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4944a2a386eSRichard Tran Mills }
4951950a7e7SRichard Tran Mills #endif
4964a2a386eSRichard Tran Mills 
497ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
498df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
499df555b71SRichard Tran Mills {
500df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
501df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
502df555b71SRichard Tran Mills   const PetscScalar *x;
503df555b71SRichard Tran Mills   PetscScalar       *y;
504df555b71SRichard Tran Mills   PetscErrorCode    ierr;
5050632b357SRichard Tran Mills   sparse_status_t   stat;
506551aa5c8SRichard Tran Mills   PetscObjectState  state;
507df555b71SRichard Tran Mills 
508df555b71SRichard Tran Mills   PetscFunctionBegin;
509df555b71SRichard Tran Mills 
51038987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
51138987b35SRichard Tran Mills   if (!a->nz) {
51238987b35SRichard Tran Mills     PetscInt i;
51338987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
51438987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
51538987b35SRichard Tran Mills     for (i=0; i<n; i++) {
51638987b35SRichard Tran Mills       y[i] = 0.0;
51738987b35SRichard Tran Mills     }
51838987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
51938987b35SRichard Tran Mills     PetscFunctionReturn(0);
52038987b35SRichard Tran Mills   }
521f36dfe3fSRichard Tran Mills 
522df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
523df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524df555b71SRichard Tran Mills 
5253fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5263fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5273fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
528551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
529551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5303fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5313fa15762SRichard Tran Mills   }
5323fa15762SRichard Tran Mills 
533df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
534df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
535db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
536df555b71SRichard Tran Mills 
537df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
538df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
539df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
540df555b71SRichard Tran Mills   PetscFunctionReturn(0);
541df555b71SRichard Tran Mills }
542d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
543df555b71SRichard Tran Mills 
544e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
5454a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
5464a2a386eSRichard Tran Mills {
5474a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5484a2a386eSRichard Tran Mills   const PetscScalar *x;
5494a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5504a2a386eSRichard Tran Mills   const MatScalar   *aa;
5514a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5524a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
553db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5544a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5554a2a386eSRichard Tran Mills   PetscInt          i;
5564a2a386eSRichard Tran Mills 
557ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
558ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
559a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
560db63039fSRichard Tran Mills   PetscScalar       beta;
561a84739b8SRichard Tran Mills   char              matdescra[6];
562ff03dc53SRichard Tran Mills 
563ff03dc53SRichard Tran Mills   PetscFunctionBegin;
564a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
565a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
566a84739b8SRichard Tran Mills 
567ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
568ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
569ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
570ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
571ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
572ff03dc53SRichard Tran Mills 
573ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
574a84739b8SRichard Tran Mills   if (zz == yy) {
575a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
576db63039fSRichard Tran Mills     beta = 1.0;
577db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
578a84739b8SRichard Tran Mills   } else {
579db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
580db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
581db63039fSRichard Tran Mills     beta = 0.0;
582db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
584ff03dc53SRichard Tran Mills       z[i] += y[i];
585ff03dc53SRichard Tran Mills     }
586a84739b8SRichard Tran Mills   }
587ff03dc53SRichard Tran Mills 
588ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
589ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
590ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
591ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
592ff03dc53SRichard Tran Mills }
5931950a7e7SRichard Tran Mills #endif
594ff03dc53SRichard Tran Mills 
595ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
596df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
597df555b71SRichard Tran Mills {
598df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
599df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
600df555b71SRichard Tran Mills   const PetscScalar *x;
601df555b71SRichard Tran Mills   PetscScalar       *y,*z;
602df555b71SRichard Tran Mills   PetscErrorCode    ierr;
603df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
604df555b71SRichard Tran Mills   PetscInt          i;
605df555b71SRichard Tran Mills 
606df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
607df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
608551aa5c8SRichard Tran Mills   PetscObjectState  state;
609df555b71SRichard Tran Mills 
610df555b71SRichard Tran Mills   PetscFunctionBegin;
611df555b71SRichard Tran Mills 
61238987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
61338987b35SRichard Tran Mills   if (!a->nz) {
61438987b35SRichard Tran Mills     PetscInt i;
61538987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
61638987b35SRichard Tran Mills     for (i=0; i<m; i++) {
61738987b35SRichard Tran Mills       z[i] = y[i];
61838987b35SRichard Tran Mills     }
61938987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62038987b35SRichard Tran Mills     PetscFunctionReturn(0);
62138987b35SRichard Tran Mills   }
622df555b71SRichard Tran Mills 
623df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
624df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
625df555b71SRichard Tran Mills 
6263fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6273fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6283fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
629551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
630551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6313fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6323fa15762SRichard Tran Mills   }
6333fa15762SRichard Tran Mills 
634df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
635df555b71SRichard Tran Mills   if (zz == yy) {
636df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
637df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
638db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
639db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
640df555b71SRichard Tran Mills   } else {
641df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
642df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
643db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
644db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
646df555b71SRichard Tran Mills       z[i] += y[i];
647df555b71SRichard Tran Mills     }
648df555b71SRichard Tran Mills   }
649df555b71SRichard Tran Mills 
650df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
651df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
652df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
653df555b71SRichard Tran Mills   PetscFunctionReturn(0);
654df555b71SRichard Tran Mills }
655d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
656df555b71SRichard Tran Mills 
657e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
658ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
659ff03dc53SRichard Tran Mills {
660ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
661ff03dc53SRichard Tran Mills   const PetscScalar *x;
662ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
663ff03dc53SRichard Tran Mills   const MatScalar   *aa;
664ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
665ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
666db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
667ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
668ff03dc53SRichard Tran Mills   PetscInt          i;
669ff03dc53SRichard Tran Mills 
670ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
671ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
672a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
673db63039fSRichard Tran Mills   PetscScalar       beta;
674a84739b8SRichard Tran Mills   char              matdescra[6];
6754a2a386eSRichard Tran Mills 
6764a2a386eSRichard Tran Mills   PetscFunctionBegin;
677a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
678a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
679a84739b8SRichard Tran Mills 
6804a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6814a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6824a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6834a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6844a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6854a2a386eSRichard Tran Mills 
6864a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
687a84739b8SRichard Tran Mills   if (zz == yy) {
688a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
689db63039fSRichard Tran Mills     beta = 1.0;
690969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
691a84739b8SRichard Tran Mills   } else {
692db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
693db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
694db63039fSRichard Tran Mills     beta = 0.0;
695db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6974a2a386eSRichard Tran Mills       z[i] += y[i];
6984a2a386eSRichard Tran Mills     }
699a84739b8SRichard Tran Mills   }
7004a2a386eSRichard Tran Mills 
7014a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
7024a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7034a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
7044a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7054a2a386eSRichard Tran Mills }
7061950a7e7SRichard Tran Mills #endif
7074a2a386eSRichard Tran Mills 
708ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
709df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
710df555b71SRichard Tran Mills {
711df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
712df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
713df555b71SRichard Tran Mills   const PetscScalar *x;
714df555b71SRichard Tran Mills   PetscScalar       *y,*z;
715df555b71SRichard Tran Mills   PetscErrorCode    ierr;
716969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
717df555b71SRichard Tran Mills   PetscInt          i;
718551aa5c8SRichard Tran Mills   PetscObjectState  state;
719df555b71SRichard Tran Mills 
720df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
721df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
722df555b71SRichard Tran Mills 
723df555b71SRichard Tran Mills   PetscFunctionBegin;
724df555b71SRichard Tran Mills 
72538987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
72638987b35SRichard Tran Mills   if (!a->nz) {
72738987b35SRichard Tran Mills     PetscInt i;
72838987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
72938987b35SRichard Tran Mills     for (i=0; i<n; i++) {
73038987b35SRichard Tran Mills       z[i] = y[i];
73138987b35SRichard Tran Mills     }
73238987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
73338987b35SRichard Tran Mills     PetscFunctionReturn(0);
73438987b35SRichard Tran Mills   }
735f36dfe3fSRichard Tran Mills 
736df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
737df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
738df555b71SRichard Tran Mills 
7393fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
7403fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
7413fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
742551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
743551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
7443fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
7453fa15762SRichard Tran Mills   }
7463fa15762SRichard Tran Mills 
747df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
748df555b71SRichard Tran Mills   if (zz == yy) {
749df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
750df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
751db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
752db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
753df555b71SRichard Tran Mills   } else {
754df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
755df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
756db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
757db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
759df555b71SRichard Tran Mills       z[i] += y[i];
760df555b71SRichard Tran Mills     }
761df555b71SRichard Tran Mills   }
762df555b71SRichard Tran Mills 
763df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
764df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
765df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
766df555b71SRichard Tran Mills   PetscFunctionReturn(0);
767df555b71SRichard Tran Mills }
768d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
769df555b71SRichard Tran Mills 
770190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
7718a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
772190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
773431879ecSRichard Tran Mills {
7741495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
775431879ecSRichard Tran Mills   sparse_matrix_t     csrA,csrB,csrC;
776190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
777431879ecSRichard Tran Mills   PetscErrorCode      ierr;
778431879ecSRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
779431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
780431879ecSRichard Tran Mills   PetscObjectState    state;
781431879ecSRichard Tran Mills 
782431879ecSRichard Tran Mills   PetscFunctionBegin;
783190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
784190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
785190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
786190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
787190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
788190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
789190ae7a4SRichard Tran Mills 
790431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
791431879ecSRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
792431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
793431879ecSRichard Tran Mills   }
794431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
795431879ecSRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
796431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
797431879ecSRichard Tran Mills   }
798431879ecSRichard Tran Mills   csrA = a->csrA;
799431879ecSRichard Tran Mills   csrB = b->csrA;
800431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
801431879ecSRichard Tran Mills 
802190ae7a4SRichard Tran Mills   if (csrA && csrB) {
803190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
804db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
805190ae7a4SRichard Tran Mills   } else {
806190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
807190ae7a4SRichard Tran Mills   }
808190ae7a4SRichard Tran Mills 
809190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
810431879ecSRichard Tran Mills 
811431879ecSRichard Tran Mills   PetscFunctionReturn(0);
812431879ecSRichard Tran Mills }
813431879ecSRichard Tran Mills 
814190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
815e8be1fc7SRichard Tran Mills {
8161495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
817e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
818e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
819e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
820e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
821e8be1fc7SRichard Tran Mills   PetscObjectState    state;
822e8be1fc7SRichard Tran Mills 
823e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
824e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
825e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
826e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
827e8be1fc7SRichard Tran Mills   }
828e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
829e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
830e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
831e8be1fc7SRichard Tran Mills   }
832e8be1fc7SRichard Tran Mills   csrA = a->csrA;
833e8be1fc7SRichard Tran Mills   csrB = b->csrA;
834e8be1fc7SRichard Tran Mills   csrC = c->csrA;
835e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
836e8be1fc7SRichard Tran Mills 
837190ae7a4SRichard Tran Mills   if (csrA && csrB) {
838190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
839db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
840190ae7a4SRichard Tran Mills   } else {
841190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
842190ae7a4SRichard Tran Mills   }
843e8be1fc7SRichard Tran Mills 
844e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8454f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
846e8be1fc7SRichard Tran Mills 
847e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
848e8be1fc7SRichard Tran Mills }
849e8be1fc7SRichard Tran Mills 
850190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
8514f53af40SRichard Tran Mills {
852190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
853190ae7a4SRichard Tran Mills 
854190ae7a4SRichard Tran Mills   PetscFunctionBegin;
855190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
856190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
857190ae7a4SRichard Tran Mills }
858190ae7a4SRichard Tran Mills 
859190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
860190ae7a4SRichard Tran Mills {
861190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
862190ae7a4SRichard Tran Mills 
863190ae7a4SRichard Tran Mills   PetscFunctionBegin;
864190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
865190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
866190ae7a4SRichard Tran Mills }
867190ae7a4SRichard Tran Mills 
868190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
869190ae7a4SRichard Tran Mills {
870190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
871190ae7a4SRichard Tran Mills 
872190ae7a4SRichard Tran Mills   PetscFunctionBegin;
873190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
874190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
875190ae7a4SRichard Tran Mills }
876190ae7a4SRichard Tran Mills 
877190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
878190ae7a4SRichard Tran Mills {
879190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
880190ae7a4SRichard Tran Mills 
881190ae7a4SRichard Tran Mills   PetscFunctionBegin;
882190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
883190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
884190ae7a4SRichard Tran Mills }
885190ae7a4SRichard Tran Mills 
886190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
887190ae7a4SRichard Tran Mills {
888190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
889190ae7a4SRichard Tran Mills 
890190ae7a4SRichard Tran Mills   PetscFunctionBegin;
891190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
892190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
893190ae7a4SRichard Tran Mills }
894190ae7a4SRichard Tran Mills 
895190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
896190ae7a4SRichard Tran Mills {
897190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
898190ae7a4SRichard Tran Mills 
899190ae7a4SRichard Tran Mills   PetscFunctionBegin;
900190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
901190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
902190ae7a4SRichard Tran Mills }
903190ae7a4SRichard Tran Mills 
904190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
905190ae7a4SRichard Tran Mills {
906190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
907190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
908190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
909190ae7a4SRichard Tran Mills 
910190ae7a4SRichard Tran Mills   PetscFunctionBegin;
911190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
912190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
913190ae7a4SRichard Tran Mills }
914190ae7a4SRichard Tran Mills 
915190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
916190ae7a4SRichard Tran Mills {
917190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
918190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
919190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
920190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
921190ae7a4SRichard Tran Mills 
922190ae7a4SRichard Tran Mills   PetscFunctionBegin;
923190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
924190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
925190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
926190ae7a4SRichard Tran Mills }
927190ae7a4SRichard Tran Mills 
92849ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
929190ae7a4SRichard Tran Mills {
930190ae7a4SRichard Tran Mills   Mat                 Ct;
931190ae7a4SRichard Tran Mills   Vec                 zeros;
9321495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
9334f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
9344f53af40SRichard Tran Mills   PetscBool           set, flag;
9354f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
936b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
9374f53af40SRichard Tran Mills   PetscObjectState    state;
9384f53af40SRichard Tran Mills   PetscErrorCode      ierr;
9394f53af40SRichard Tran Mills 
9404f53af40SRichard Tran Mills   PetscFunctionBegin;
9414f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
94237f0d54fSRichard Tran Mills   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
9434f53af40SRichard Tran Mills 
9444f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
9454f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
9464f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
9474f53af40SRichard Tran Mills   }
9484f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
9494f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
9504f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
9514f53af40SRichard Tran Mills   }
9524f53af40SRichard Tran Mills   csrA = a->csrA;
9534f53af40SRichard Tran Mills   csrP = p->csrA;
9544f53af40SRichard Tran Mills   csrC = c->csrA;
955b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
956190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
957b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
9584f53af40SRichard Tran Mills 
959f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
960b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
961db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
9624f53af40SRichard Tran Mills 
963190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
964190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
965190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
966190ae7a4SRichard Tran Mills    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
967190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
968190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
969190ae7a4SRichard Tran Mills    * the full matrix. */
9704f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
971190ae7a4SRichard Tran Mills   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
972190ae7a4SRichard Tran Mills   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
973190ae7a4SRichard Tran Mills   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
974190ae7a4SRichard Tran Mills   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
975190ae7a4SRichard Tran Mills   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
976190ae7a4SRichard Tran Mills   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
977190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
978190ae7a4SRichard Tran Mills   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
9791495fedeSRichard Tran Mills   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
980190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
981190ae7a4SRichard Tran Mills   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
982190ae7a4SRichard Tran Mills   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
9834f53af40SRichard Tran Mills   PetscFunctionReturn(0);
9844f53af40SRichard Tran Mills }
985190ae7a4SRichard Tran Mills 
986190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
987190ae7a4SRichard Tran Mills {
988190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
989190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
9901495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
991190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
992190ae7a4SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
993190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
994190ae7a4SRichard Tran Mills   PetscObjectState    state;
995190ae7a4SRichard Tran Mills   PetscErrorCode      ierr;
996190ae7a4SRichard Tran Mills 
997190ae7a4SRichard Tran Mills   PetscFunctionBegin;
998190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
999190ae7a4SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
1000190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
1001190ae7a4SRichard Tran Mills   }
1002190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1003190ae7a4SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
1004190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
1005190ae7a4SRichard Tran Mills   }
1006190ae7a4SRichard Tran Mills   csrA = a->csrA;
1007190ae7a4SRichard Tran Mills   csrP = p->csrA;
1008190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1009190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1010190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1011190ae7a4SRichard Tran Mills 
1012190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1013190ae7a4SRichard Tran Mills   if (csrP && csrA) {
1014190ae7a4SRichard Tran Mills     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1015db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1016190ae7a4SRichard Tran Mills   } else {
1017190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
1018190ae7a4SRichard Tran Mills   }
1019190ae7a4SRichard Tran Mills 
1020190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1021190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
102249ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
102349ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
102449ba5396SRichard Tran Mills    * is guaranteed. */
1025190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1026190ae7a4SRichard Tran Mills 
1027190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
1028190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1029190ae7a4SRichard Tran Mills }
1030190ae7a4SRichard Tran Mills 
1031190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1032190ae7a4SRichard Tran Mills {
1033190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1034190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
1035190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1036190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1037190ae7a4SRichard Tran Mills }
1038190ae7a4SRichard Tran Mills 
1039190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1040190ae7a4SRichard Tran Mills {
1041190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1042190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1043190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1044190ae7a4SRichard Tran Mills }
1045190ae7a4SRichard Tran Mills 
1046190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1047190ae7a4SRichard Tran Mills {
1048190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1049190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1050190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1051190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1052190ae7a4SRichard Tran Mills }
1053190ae7a4SRichard Tran Mills 
1054190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1055190ae7a4SRichard Tran Mills {
1056190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1057190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1058190ae7a4SRichard Tran Mills   Mat            A = product->A;
1059190ae7a4SRichard Tran Mills   PetscBool      set, flag;
1060190ae7a4SRichard Tran Mills 
1061190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1062a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
10632ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
10642ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
1065190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
10662ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
1067190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
1068a3d67537SPierre Jolivet   } else {
1069190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1070190ae7a4SRichard Tran Mills     ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1071a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1072a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
10731495fedeSRichard Tran Mills     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1074190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1075a3d67537SPierre Jolivet   }
1076190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1077190ae7a4SRichard Tran Mills }
1078190ae7a4SRichard Tran Mills 
1079190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1080190ae7a4SRichard Tran Mills {
1081190ae7a4SRichard Tran Mills   PetscFunctionBegin;
10822ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1083190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1084190ae7a4SRichard Tran Mills }
1085190ae7a4SRichard Tran Mills 
1086190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1087190ae7a4SRichard Tran Mills {
1088190ae7a4SRichard Tran Mills   PetscFunctionBegin;
10892ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1090190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1091190ae7a4SRichard Tran Mills }
1092190ae7a4SRichard Tran Mills 
1093190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1094190ae7a4SRichard Tran Mills {
1095190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1096190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1097190ae7a4SRichard Tran Mills 
1098190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1099190ae7a4SRichard Tran Mills   switch (product->type) {
1100190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
1101190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1102190ae7a4SRichard Tran Mills     break;
1103190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
1104190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1105190ae7a4SRichard Tran Mills     break;
1106190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
1107190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1108190ae7a4SRichard Tran Mills     break;
1109190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
1110190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1111190ae7a4SRichard Tran Mills     break;
1112190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
1113190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1114190ae7a4SRichard Tran Mills     break;
1115190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
1116190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1117190ae7a4SRichard Tran Mills     break;
1118190ae7a4SRichard Tran Mills   default:
1119190ae7a4SRichard Tran Mills     break;
1120190ae7a4SRichard Tran Mills   }
1121190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1122190ae7a4SRichard Tran Mills }
1123431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1124190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
11254f53af40SRichard Tran Mills 
11264a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1127510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
11284a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
11294a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
11304a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
11314a2a386eSRichard Tran Mills {
11324a2a386eSRichard Tran Mills   PetscErrorCode ierr;
11334a2a386eSRichard Tran Mills   Mat            B = *newmat;
11344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1135c9d46305SRichard Tran Mills   PetscBool      set;
1136e9c94282SRichard Tran Mills   PetscBool      sametype;
11374a2a386eSRichard Tran Mills 
11384a2a386eSRichard Tran Mills   PetscFunctionBegin;
11394a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
11404a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
11414a2a386eSRichard Tran Mills   }
11424a2a386eSRichard Tran Mills 
1143e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1144e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1145e9c94282SRichard Tran Mills 
11464a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
11474a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
11484a2a386eSRichard Tran Mills 
1149df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1150969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
11514a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
11524a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
11534a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1154c9d46305SRichard Tran Mills 
11554abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1156ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1157d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1158a8327b06SKarl Rupp #else
1159d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1160d995685eSRichard Tran Mills #endif
11615b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
11624abfa3b3SRichard Tran Mills 
11634abfa3b3SRichard Tran Mills   /* Parse command line options. */
1164c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
116548292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
116648292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1167c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1168ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1169d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
1170d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1171d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1172d995685eSRichard Tran Mills   }
1173d995685eSRichard Tran Mills #endif
1174c9d46305SRichard Tran Mills 
1175ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1177969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1178df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1179969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
11808a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1181190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1182190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1183190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1185190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1186ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
118749ba5396SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1188190ae7a4SRichard Tran Mills #   else
1189190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
11904f53af40SRichard Tran Mills #   endif
1191e8be1fc7SRichard Tran Mills # endif
11921950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
11931950a7e7SRichard Tran Mills 
1194213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1195213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1196213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1197213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1198213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
11991950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
12004a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1201969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
12024a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1203969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1204c9d46305SRichard Tran Mills   }
12051950a7e7SRichard Tran Mills #endif
12064a2a386eSRichard Tran Mills 
12074a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
12084a2a386eSRichard Tran Mills 
12094a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
12104a2a386eSRichard Tran Mills   *newmat = B;
12114a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12124a2a386eSRichard Tran Mills }
12134a2a386eSRichard Tran Mills 
12144a2a386eSRichard Tran Mills /*@C
12154a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
12164a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
12174a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
121890147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
121990147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1220597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1221597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1222597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
122390147e49SRichard Tran Mills 
1224d083f849SBarry Smith    Collective
12254a2a386eSRichard Tran Mills 
12264a2a386eSRichard Tran Mills    Input Parameters:
12274a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
12284a2a386eSRichard Tran Mills .  m - number of rows
12294a2a386eSRichard Tran Mills .  n - number of columns
12304a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
12314a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
12324a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
12334a2a386eSRichard Tran Mills 
12344a2a386eSRichard Tran Mills    Output Parameter:
12354a2a386eSRichard Tran Mills .  A - the matrix
12364a2a386eSRichard Tran Mills 
123790147e49SRichard Tran Mills    Options Database Keys:
123866b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
123966b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
124090147e49SRichard Tran Mills 
12414a2a386eSRichard Tran Mills    Notes:
12424a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
12434a2a386eSRichard Tran Mills 
12444a2a386eSRichard Tran Mills    Level: intermediate
12454a2a386eSRichard Tran Mills 
12464a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
12474a2a386eSRichard Tran Mills @*/
12484a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
12494a2a386eSRichard Tran Mills {
12504a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12514a2a386eSRichard Tran Mills 
12524a2a386eSRichard Tran Mills   PetscFunctionBegin;
12534a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
12544a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
12554a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
12564a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
12574a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12584a2a386eSRichard Tran Mills }
12594a2a386eSRichard Tran Mills 
12604a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
12614a2a386eSRichard Tran Mills {
12624a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12634a2a386eSRichard Tran Mills 
12644a2a386eSRichard Tran Mills   PetscFunctionBegin;
12654a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
12664a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
12674a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12684a2a386eSRichard Tran Mills }
1269