xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision edc89de7afbff9901e161544f070319e05d07b08)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35c1d5218aSRichard Tran Mills #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
554f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
584222ddf1SHong Zhang 
59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60190ae7a4SRichard Tran Mills   if (!aijmkl->no_SpMV2) {
61190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
64190ae7a4SRichard Tran Mills   }
65190ae7a4SRichard Tran Mills 
664abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
68e9c94282SRichard Tran Mills    * the spptr pointer. */
69a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70a8327b06SKarl Rupp 
714abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
720632b357SRichard Tran Mills     sparse_status_t stat;
734abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
74db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
754abfa3b3SRichard Tran Mills   }
76ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
77e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
804a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
814a2a386eSRichard Tran Mills 
824a2a386eSRichard Tran Mills   *newmat = B;
834a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
844a2a386eSRichard Tran Mills }
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
874a2a386eSRichard Tran Mills {
884a2a386eSRichard Tran Mills   PetscErrorCode ierr;
894a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
904a2a386eSRichard Tran Mills 
914a2a386eSRichard Tran Mills   PetscFunctionBegin;
92e9c94282SRichard Tran Mills 
93*edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
94e9c94282SRichard Tran Mills   if (aijmkl) {
954a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
96ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
974abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
984abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
994abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
100db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
1014abfa3b3SRichard Tran Mills     }
1024abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1034a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
104e9c94282SRichard Tran Mills   }
1054a2a386eSRichard Tran Mills 
1064a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1074a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1084a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1094a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1104a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1114a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1124a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1134a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1144a2a386eSRichard Tran Mills }
1154a2a386eSRichard Tran Mills 
116190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1175b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1185b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1195b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1205b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1215b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1225b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1236e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1244a2a386eSRichard Tran Mills {
125ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1266e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1276e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1286e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
12945fbe478SRichard Tran Mills   PetscFunctionBegin;
1306e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1316e369cd5SRichard Tran Mills #else
132a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
133a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
134a8327b06SKarl Rupp   PetscInt         m,n;
135a8327b06SKarl Rupp   MatScalar        *aa;
136a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1376e369cd5SRichard Tran Mills   sparse_status_t  stat;
138551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1394a2a386eSRichard Tran Mills 
140a8327b06SKarl Rupp   PetscFunctionBegin;
141e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
142e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
143e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
144e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
145e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1466e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1474d51fa23SRichard Tran Mills #endif
1486e369cd5SRichard Tran Mills 
1490632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1500632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1510632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1520632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
153db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
1540632b357SRichard Tran Mills   }
1558d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1566e369cd5SRichard Tran Mills 
157c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
158df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
16158678438SRichard Tran Mills   m = A->rmap->n;
16258678438SRichard Tran Mills   n = A->cmap->n;
163df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
164df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
165df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1661495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1678d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1688d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
16958678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
170db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
171df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
173df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
174db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
1751950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
176df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
177db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
1781950a7e7SRichard Tran Mills     }
1794abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
180e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
181190ae7a4SRichard Tran Mills   } else {
182190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
183c9d46305SRichard Tran Mills   }
1846e369cd5SRichard Tran Mills 
1856e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
186d995685eSRichard Tran Mills #endif
1876e369cd5SRichard Tran Mills }
1886e369cd5SRichard Tran Mills 
189b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
190190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
191190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
19219afcda9SRichard Tran Mills {
19319afcda9SRichard Tran Mills   PetscErrorCode      ierr;
19419afcda9SRichard Tran Mills   sparse_status_t     stat;
19519afcda9SRichard Tran Mills   sparse_index_base_t indexing;
196190ae7a4SRichard Tran Mills   PetscInt            m,n;
19745fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
19819afcda9SRichard Tran Mills   MatScalar           *aa;
19919afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
20019afcda9SRichard Tran Mills 
201190ae7a4SRichard Tran Mills   if (csrA) {
20245fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
203190ae7a4SRichard Tran Mills     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
2049c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205190ae7a4SRichard Tran Mills     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
206190ae7a4SRichard Tran Mills   } else {
207190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
208190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
209aab60f1bSRichard Tran Mills   }
210190ae7a4SRichard Tran Mills 
21119afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
21245fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
213aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
216aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217190ae7a4SRichard Tran Mills   if (csrA) {
218190ae7a4SRichard Tran Mills     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
219190ae7a4SRichard Tran Mills   } else {
220190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
221190ae7a4SRichard Tran Mills     ierr = MatSetUp(A);CHKERRQ(ierr);
222190ae7a4SRichard Tran Mills     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
223190ae7a4SRichard Tran Mills     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224190ae7a4SRichard Tran Mills   }
22519afcda9SRichard Tran Mills 
22619afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22719afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22819afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2296c87cf42SRichard Tran Mills 
23019afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
23119afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2326c87cf42SRichard Tran Mills 
23319afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
23419afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23519afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
236f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239190ae7a4SRichard Tran Mills   if (csrA) {
24019afcda9SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
241db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
24219afcda9SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
243db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
2441950a7e7SRichard Tran Mills   }
245e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
24619afcda9SRichard Tran Mills   PetscFunctionReturn(0);
24719afcda9SRichard Tran Mills }
248b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
249190ae7a4SRichard Tran Mills 
250e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
251e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
252e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
253b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
254190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
255e8be1fc7SRichard Tran Mills {
256e8be1fc7SRichard Tran Mills   PetscInt            i;
257e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
258e8be1fc7SRichard Tran Mills   PetscInt            nz;
259e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
260e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
261e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
2621495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
263e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
264e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
265e8be1fc7SRichard Tran Mills 
266190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
267190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
268190ae7a4SRichard Tran Mills 
269e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
270e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
271e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
272e8be1fc7SRichard Tran Mills 
273e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
274e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
275e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
276e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
277e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
278e8be1fc7SRichard Tran Mills   }
279e8be1fc7SRichard Tran Mills 
280e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
281e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
282e8be1fc7SRichard Tran Mills 
283e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
284e995cf24SRichard Tran Mills   /* We mark our matrix as having a valid, optimized MKL handle.
285e995cf24SRichard Tran Mills    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
286e995cf24SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
287e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
288e8be1fc7SRichard Tran Mills }
289b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
290e8be1fc7SRichard Tran Mills 
2913849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2923849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
2933849ddb2SRichard Tran Mills {
2943849ddb2SRichard Tran Mills   PetscInt            i,j,k;
2953849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
2963849ddb2SRichard Tran Mills   PetscInt            nz;
2973849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
2983849ddb2SRichard Tran Mills   PetscScalar         *aa;
2993849ddb2SRichard Tran Mills   PetscErrorCode      ierr;
3001495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3013849ddb2SRichard Tran Mills   sparse_status_t     stat;
3023849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
3033849ddb2SRichard Tran Mills 
3043849ddb2SRichard Tran Mills   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
3053849ddb2SRichard Tran Mills 
3063849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
3073849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
3083849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
3093849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
3103849ddb2SRichard Tran Mills   }
3113849ddb2SRichard Tran Mills 
3123849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
3133849ddb2SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
3143849ddb2SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
3153849ddb2SRichard Tran Mills 
3163849ddb2SRichard Tran Mills   k = 0;
3173849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
3183849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
3193849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
3203849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
3213849ddb2SRichard Tran Mills       if (aa) {
3223849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
3233849ddb2SRichard Tran Mills       } else {
3243849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
3253849ddb2SRichard Tran Mills       }
3263849ddb2SRichard Tran Mills       k++;
3273849ddb2SRichard Tran Mills     }
3283849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
3293849ddb2SRichard Tran Mills   }
3303849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
3313849ddb2SRichard Tran Mills }
3323849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3333849ddb2SRichard Tran Mills 
3346e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3356e369cd5SRichard Tran Mills {
3366e369cd5SRichard Tran Mills   PetscErrorCode ierr;
3371495fedeSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3386e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3396e369cd5SRichard Tran Mills 
3406e369cd5SRichard Tran Mills   PetscFunctionBegin;
3416e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
3426e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
343580bdb30SBarry Smith   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
3446e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3455b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3466e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3475b49642aSRichard Tran Mills   }
3486e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3496e369cd5SRichard Tran Mills }
3506e369cd5SRichard Tran Mills 
3516e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3526e369cd5SRichard Tran Mills {
3536e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3546e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3555b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3566e369cd5SRichard Tran Mills 
3576e369cd5SRichard Tran Mills   PetscFunctionBegin;
3586e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3596e369cd5SRichard Tran Mills 
3606e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3616e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3626e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3636e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
364d96e85feSRichard Tran Mills    * a lot of code duplication. */
3656e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3666e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3676e369cd5SRichard Tran Mills 
3685b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3695b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3705b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3715b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3726e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3735b49642aSRichard Tran Mills   }
374df555b71SRichard Tran Mills 
3754a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3764a2a386eSRichard Tran Mills }
3774a2a386eSRichard Tran Mills 
378e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3794a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3804a2a386eSRichard Tran Mills {
3814a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3824a2a386eSRichard Tran Mills   const PetscScalar *x;
3834a2a386eSRichard Tran Mills   PetscScalar       *y;
3844a2a386eSRichard Tran Mills   const MatScalar   *aa;
3854a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3864a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
387db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
388db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
389db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3904a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
391db63039fSRichard Tran Mills   char              matdescra[6];
392db63039fSRichard Tran Mills 
3934a2a386eSRichard Tran Mills 
3944a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
395ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
396ff03dc53SRichard Tran Mills 
397ff03dc53SRichard Tran Mills   PetscFunctionBegin;
398db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
399db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
400ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
401ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
402ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
403ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
404ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
405ff03dc53SRichard Tran Mills 
406ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
407db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
408ff03dc53SRichard Tran Mills 
409ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
410ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
411ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
412ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
413ff03dc53SRichard Tran Mills }
4141950a7e7SRichard Tran Mills #endif
415ff03dc53SRichard Tran Mills 
416ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
417df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
418df555b71SRichard Tran Mills {
419df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
420df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
421df555b71SRichard Tran Mills   const PetscScalar *x;
422df555b71SRichard Tran Mills   PetscScalar       *y;
423df555b71SRichard Tran Mills   PetscErrorCode    ierr;
424df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
425551aa5c8SRichard Tran Mills   PetscObjectState  state;
426df555b71SRichard Tran Mills 
427df555b71SRichard Tran Mills   PetscFunctionBegin;
428df555b71SRichard Tran Mills 
42938987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
43038987b35SRichard Tran Mills   if(!a->nz) {
43138987b35SRichard Tran Mills     PetscInt i;
43238987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
43338987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
43438987b35SRichard Tran Mills     for (i=0; i<m; i++) {
43538987b35SRichard Tran Mills       y[i] = 0.0;
43638987b35SRichard Tran Mills     }
43738987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
43838987b35SRichard Tran Mills     PetscFunctionReturn(0);
43938987b35SRichard Tran Mills   }
440f36dfe3fSRichard Tran Mills 
441df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
442df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
443df555b71SRichard Tran Mills 
4443fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4453fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4463fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
447551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
448551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4493fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4503fa15762SRichard Tran Mills   }
4513fa15762SRichard Tran Mills 
452df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
453df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
454db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
455df555b71SRichard Tran Mills 
456df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
457df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
458df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
459df555b71SRichard Tran Mills   PetscFunctionReturn(0);
460df555b71SRichard Tran Mills }
461d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
462df555b71SRichard Tran Mills 
463e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
464ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
465ff03dc53SRichard Tran Mills {
466ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
467ff03dc53SRichard Tran Mills   const PetscScalar *x;
468ff03dc53SRichard Tran Mills   PetscScalar       *y;
469ff03dc53SRichard Tran Mills   const MatScalar   *aa;
470ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
471ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
472db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
473db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
474db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
475ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
476db63039fSRichard Tran Mills   char              matdescra[6];
477ff03dc53SRichard Tran Mills 
478ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
479ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4804a2a386eSRichard Tran Mills 
4814a2a386eSRichard Tran Mills   PetscFunctionBegin;
482969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
483969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4844a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4854a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4864a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4874a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4884a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4894a2a386eSRichard Tran Mills 
4904a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
491db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4924a2a386eSRichard Tran Mills 
4934a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4944a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4954a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4964a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4974a2a386eSRichard Tran Mills }
4981950a7e7SRichard Tran Mills #endif
4994a2a386eSRichard Tran Mills 
500ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
501df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
502df555b71SRichard Tran Mills {
503df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
504df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
505df555b71SRichard Tran Mills   const PetscScalar *x;
506df555b71SRichard Tran Mills   PetscScalar       *y;
507df555b71SRichard Tran Mills   PetscErrorCode    ierr;
5080632b357SRichard Tran Mills   sparse_status_t   stat;
509551aa5c8SRichard Tran Mills   PetscObjectState  state;
510df555b71SRichard Tran Mills 
511df555b71SRichard Tran Mills   PetscFunctionBegin;
512df555b71SRichard Tran Mills 
51338987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
51438987b35SRichard Tran Mills   if(!a->nz) {
51538987b35SRichard Tran Mills     PetscInt i;
51638987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
51738987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
51838987b35SRichard Tran Mills     for (i=0; i<n; i++) {
51938987b35SRichard Tran Mills       y[i] = 0.0;
52038987b35SRichard Tran Mills     }
52138987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
52238987b35SRichard Tran Mills     PetscFunctionReturn(0);
52338987b35SRichard Tran Mills   }
524f36dfe3fSRichard Tran Mills 
525df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527df555b71SRichard Tran Mills 
5283fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5293fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5303fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
531551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
532551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5333fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5343fa15762SRichard Tran Mills   }
5353fa15762SRichard Tran Mills 
536df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
537df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
538db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
539df555b71SRichard Tran Mills 
540df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
541df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
542df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
543df555b71SRichard Tran Mills   PetscFunctionReturn(0);
544df555b71SRichard Tran Mills }
545d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
546df555b71SRichard Tran Mills 
547e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
5484a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
5494a2a386eSRichard Tran Mills {
5504a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5514a2a386eSRichard Tran Mills   const PetscScalar *x;
5524a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5534a2a386eSRichard Tran Mills   const MatScalar   *aa;
5544a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5554a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
556db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5574a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5584a2a386eSRichard Tran Mills   PetscInt          i;
5594a2a386eSRichard Tran Mills 
560ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
561ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
562a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
563db63039fSRichard Tran Mills   PetscScalar       beta;
564a84739b8SRichard Tran Mills   char              matdescra[6];
565ff03dc53SRichard Tran Mills 
566ff03dc53SRichard Tran Mills   PetscFunctionBegin;
567a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
568a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
569a84739b8SRichard Tran Mills 
570ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
571ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
572ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
573ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
574ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
575ff03dc53SRichard Tran Mills 
576ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
577a84739b8SRichard Tran Mills   if (zz == yy) {
578a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
579db63039fSRichard Tran Mills     beta = 1.0;
580db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
581a84739b8SRichard Tran Mills   } else {
582db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
583db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
584db63039fSRichard Tran Mills     beta = 0.0;
585db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
586ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
587ff03dc53SRichard Tran Mills       z[i] += y[i];
588ff03dc53SRichard Tran Mills     }
589a84739b8SRichard Tran Mills   }
590ff03dc53SRichard Tran Mills 
591ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
592ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
593ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
594ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
595ff03dc53SRichard Tran Mills }
5961950a7e7SRichard Tran Mills #endif
597ff03dc53SRichard Tran Mills 
598ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
599df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
600df555b71SRichard Tran Mills {
601df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
602df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
603df555b71SRichard Tran Mills   const PetscScalar *x;
604df555b71SRichard Tran Mills   PetscScalar       *y,*z;
605df555b71SRichard Tran Mills   PetscErrorCode    ierr;
606df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
607df555b71SRichard Tran Mills   PetscInt          i;
608df555b71SRichard Tran Mills 
609df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
610df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
611551aa5c8SRichard Tran Mills   PetscObjectState  state;
612df555b71SRichard Tran Mills 
613df555b71SRichard Tran Mills   PetscFunctionBegin;
614df555b71SRichard Tran Mills 
61538987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
61638987b35SRichard Tran Mills   if(!a->nz) {
61738987b35SRichard Tran Mills     PetscInt i;
61838987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
61938987b35SRichard Tran Mills     for (i=0; i<m; i++) {
62038987b35SRichard Tran Mills       z[i] = y[i];
62138987b35SRichard Tran Mills     }
62238987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62338987b35SRichard Tran Mills     PetscFunctionReturn(0);
62438987b35SRichard Tran Mills   }
625df555b71SRichard Tran Mills 
626df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
627df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
628df555b71SRichard Tran Mills 
6293fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6303fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6313fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
632551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
633551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6343fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6353fa15762SRichard Tran Mills   }
6363fa15762SRichard Tran Mills 
637df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
638df555b71SRichard Tran Mills   if (zz == yy) {
639df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
640df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
641db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
642db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
643df555b71SRichard Tran Mills   } else {
644df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
645df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
646db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
647db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
648df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
649df555b71SRichard Tran Mills       z[i] += y[i];
650df555b71SRichard Tran Mills     }
651df555b71SRichard Tran Mills   }
652df555b71SRichard Tran Mills 
653df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
654df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
655df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
656df555b71SRichard Tran Mills   PetscFunctionReturn(0);
657df555b71SRichard Tran Mills }
658d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
659df555b71SRichard Tran Mills 
660e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
661ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
662ff03dc53SRichard Tran Mills {
663ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664ff03dc53SRichard Tran Mills   const PetscScalar *x;
665ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
666ff03dc53SRichard Tran Mills   const MatScalar   *aa;
667ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
668ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
669db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
670ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
671ff03dc53SRichard Tran Mills   PetscInt          i;
672ff03dc53SRichard Tran Mills 
673ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
674ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
675a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
676db63039fSRichard Tran Mills   PetscScalar       beta;
677a84739b8SRichard Tran Mills   char              matdescra[6];
6784a2a386eSRichard Tran Mills 
6794a2a386eSRichard Tran Mills   PetscFunctionBegin;
680a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
681a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
682a84739b8SRichard Tran Mills 
6834a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6844a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6854a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6864a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6874a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6884a2a386eSRichard Tran Mills 
6894a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
690a84739b8SRichard Tran Mills   if (zz == yy) {
691a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
692db63039fSRichard Tran Mills     beta = 1.0;
693969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
694a84739b8SRichard Tran Mills   } else {
695db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
696db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
697db63039fSRichard Tran Mills     beta = 0.0;
698db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
699969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
7004a2a386eSRichard Tran Mills       z[i] += y[i];
7014a2a386eSRichard Tran Mills     }
702a84739b8SRichard Tran Mills   }
7034a2a386eSRichard Tran Mills 
7044a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
7054a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7064a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
7074a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7084a2a386eSRichard Tran Mills }
7091950a7e7SRichard Tran Mills #endif
7104a2a386eSRichard Tran Mills 
711ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
712df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
713df555b71SRichard Tran Mills {
714df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
715df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
716df555b71SRichard Tran Mills   const PetscScalar *x;
717df555b71SRichard Tran Mills   PetscScalar       *y,*z;
718df555b71SRichard Tran Mills   PetscErrorCode    ierr;
719969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
720df555b71SRichard Tran Mills   PetscInt          i;
721551aa5c8SRichard Tran Mills   PetscObjectState  state;
722df555b71SRichard Tran Mills 
723df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
724df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
725df555b71SRichard Tran Mills 
726df555b71SRichard Tran Mills   PetscFunctionBegin;
727df555b71SRichard Tran Mills 
72838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
72938987b35SRichard Tran Mills   if(!a->nz) {
73038987b35SRichard Tran Mills     PetscInt i;
73138987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
73238987b35SRichard Tran Mills     for (i=0; i<n; i++) {
73338987b35SRichard Tran Mills       z[i] = y[i];
73438987b35SRichard Tran Mills     }
73538987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
73638987b35SRichard Tran Mills     PetscFunctionReturn(0);
73738987b35SRichard Tran Mills   }
738f36dfe3fSRichard Tran Mills 
739df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
740df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
741df555b71SRichard Tran Mills 
7423fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
7433fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
7443fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
745551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
746551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
7473fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
7483fa15762SRichard Tran Mills   }
7493fa15762SRichard Tran Mills 
750df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
751df555b71SRichard Tran Mills   if (zz == yy) {
752df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
753df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
754db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
755db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
756df555b71SRichard Tran Mills   } else {
757df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
758df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
759db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
760db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
761969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
762df555b71SRichard Tran Mills       z[i] += y[i];
763df555b71SRichard Tran Mills     }
764df555b71SRichard Tran Mills   }
765df555b71SRichard Tran Mills 
766df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
767df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
768df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
769df555b71SRichard Tran Mills   PetscFunctionReturn(0);
770df555b71SRichard Tran Mills }
771d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
772df555b71SRichard Tran Mills 
773190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
7748a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
775190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
776431879ecSRichard Tran Mills {
7771495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
778431879ecSRichard Tran Mills   sparse_matrix_t     csrA,csrB,csrC;
779190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
780431879ecSRichard Tran Mills   PetscErrorCode      ierr;
781431879ecSRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
782431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
783431879ecSRichard Tran Mills   PetscObjectState    state;
784431879ecSRichard Tran Mills 
785431879ecSRichard Tran Mills   PetscFunctionBegin;
786190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
787190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
788190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
789190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
790190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
791190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
792190ae7a4SRichard Tran Mills 
793431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
794431879ecSRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
795431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
796431879ecSRichard Tran Mills   }
797431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
798431879ecSRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
799431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
800431879ecSRichard Tran Mills   }
801431879ecSRichard Tran Mills   csrA = a->csrA;
802431879ecSRichard Tran Mills   csrB = b->csrA;
803431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
804431879ecSRichard Tran Mills 
805190ae7a4SRichard Tran Mills   if (csrA && csrB) {
806190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
807db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
808190ae7a4SRichard Tran Mills   } else {
809190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
810190ae7a4SRichard Tran Mills   }
811190ae7a4SRichard Tran Mills 
812190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
813431879ecSRichard Tran Mills 
814431879ecSRichard Tran Mills   PetscFunctionReturn(0);
815431879ecSRichard Tran Mills }
816431879ecSRichard Tran Mills 
817190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
818e8be1fc7SRichard Tran Mills {
8191495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
820e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
821e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
822e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
823e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
824e8be1fc7SRichard Tran Mills   PetscObjectState    state;
825e8be1fc7SRichard Tran Mills 
826e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
827e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
828e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
829e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
830e8be1fc7SRichard Tran Mills   }
831e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
832e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
833e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
834e8be1fc7SRichard Tran Mills   }
835e8be1fc7SRichard Tran Mills   csrA = a->csrA;
836e8be1fc7SRichard Tran Mills   csrB = b->csrA;
837e8be1fc7SRichard Tran Mills   csrC = c->csrA;
838e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
839e8be1fc7SRichard Tran Mills 
840190ae7a4SRichard Tran Mills   if (csrA && csrB) {
841190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
842db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
843190ae7a4SRichard Tran Mills   } else {
844190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
845190ae7a4SRichard Tran Mills   }
846e8be1fc7SRichard Tran Mills 
847e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8484f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
849e8be1fc7SRichard Tran Mills 
850e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
851e8be1fc7SRichard Tran Mills }
852e8be1fc7SRichard Tran Mills 
853190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
8544f53af40SRichard Tran Mills {
855190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
856190ae7a4SRichard Tran Mills 
857190ae7a4SRichard Tran Mills   PetscFunctionBegin;
858190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
859190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
860190ae7a4SRichard Tran Mills }
861190ae7a4SRichard Tran Mills 
862190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
863190ae7a4SRichard Tran Mills {
864190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
865190ae7a4SRichard Tran Mills 
866190ae7a4SRichard Tran Mills   PetscFunctionBegin;
867190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
868190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
869190ae7a4SRichard Tran Mills }
870190ae7a4SRichard Tran Mills 
871190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
872190ae7a4SRichard Tran Mills {
873190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
874190ae7a4SRichard Tran Mills 
875190ae7a4SRichard Tran Mills   PetscFunctionBegin;
876190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
877190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
878190ae7a4SRichard Tran Mills }
879190ae7a4SRichard Tran Mills 
880190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
881190ae7a4SRichard Tran Mills {
882190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
883190ae7a4SRichard Tran Mills 
884190ae7a4SRichard Tran Mills   PetscFunctionBegin;
885190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
886190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
887190ae7a4SRichard Tran Mills }
888190ae7a4SRichard Tran Mills 
889190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
890190ae7a4SRichard Tran Mills {
891190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
892190ae7a4SRichard Tran Mills 
893190ae7a4SRichard Tran Mills   PetscFunctionBegin;
894190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
895190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
896190ae7a4SRichard Tran Mills }
897190ae7a4SRichard Tran Mills 
898190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
899190ae7a4SRichard Tran Mills {
900190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
901190ae7a4SRichard Tran Mills 
902190ae7a4SRichard Tran Mills   PetscFunctionBegin;
903190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
904190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
905190ae7a4SRichard Tran Mills }
906190ae7a4SRichard Tran Mills 
907190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
908190ae7a4SRichard Tran Mills {
909190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
910190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
911190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
912190ae7a4SRichard Tran Mills 
913190ae7a4SRichard Tran Mills   PetscFunctionBegin;
914190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
915190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
916190ae7a4SRichard Tran Mills }
917190ae7a4SRichard Tran Mills 
918190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
919190ae7a4SRichard Tran Mills {
920190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
921190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
922190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
923190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
924190ae7a4SRichard Tran Mills 
925190ae7a4SRichard Tran Mills   PetscFunctionBegin;
926190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
927190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
928190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
929190ae7a4SRichard Tran Mills }
930190ae7a4SRichard Tran Mills 
93149ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
932190ae7a4SRichard Tran Mills {
933190ae7a4SRichard Tran Mills   Mat                 Ct;
934190ae7a4SRichard Tran Mills   Vec                 zeros;
9351495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
9364f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
9374f53af40SRichard Tran Mills   PetscBool           set, flag;
9384f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
939b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
9404f53af40SRichard Tran Mills   PetscObjectState    state;
9414f53af40SRichard Tran Mills   PetscErrorCode      ierr;
9424f53af40SRichard Tran Mills 
9434f53af40SRichard Tran Mills   PetscFunctionBegin;
9444f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
94537f0d54fSRichard Tran Mills   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
9464f53af40SRichard Tran Mills 
9474f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
9484f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
9494f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
9504f53af40SRichard Tran Mills   }
9514f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
9524f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
9534f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
9544f53af40SRichard Tran Mills   }
9554f53af40SRichard Tran Mills   csrA = a->csrA;
9564f53af40SRichard Tran Mills   csrP = p->csrA;
9574f53af40SRichard Tran Mills   csrC = c->csrA;
958b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
959190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
960b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
9614f53af40SRichard Tran Mills 
962f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
963b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
964db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
9654f53af40SRichard Tran Mills 
966190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
967190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
968190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
969190ae7a4SRichard Tran Mills    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
970190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
971190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
972190ae7a4SRichard Tran Mills    * the full matrix. */
9734f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
974190ae7a4SRichard Tran Mills   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
975190ae7a4SRichard Tran Mills   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
976190ae7a4SRichard Tran Mills   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
977190ae7a4SRichard Tran Mills   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
978190ae7a4SRichard Tran Mills   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
979190ae7a4SRichard Tran Mills   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
980190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
981190ae7a4SRichard Tran Mills   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
9821495fedeSRichard Tran Mills   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
983190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
984190ae7a4SRichard Tran Mills   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
985190ae7a4SRichard Tran Mills   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
9864f53af40SRichard Tran Mills   PetscFunctionReturn(0);
9874f53af40SRichard Tran Mills }
988190ae7a4SRichard Tran Mills 
989190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
990190ae7a4SRichard Tran Mills {
991190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
992190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
9931495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
994190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
995190ae7a4SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
996190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
997190ae7a4SRichard Tran Mills   PetscObjectState    state;
998190ae7a4SRichard Tran Mills   PetscErrorCode      ierr;
999190ae7a4SRichard Tran Mills 
1000190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1001190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1002190ae7a4SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
1003190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
1004190ae7a4SRichard Tran Mills   }
1005190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1006190ae7a4SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
1007190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
1008190ae7a4SRichard Tran Mills   }
1009190ae7a4SRichard Tran Mills   csrA = a->csrA;
1010190ae7a4SRichard Tran Mills   csrP = p->csrA;
1011190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1012190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1013190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1014190ae7a4SRichard Tran Mills 
1015190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1016190ae7a4SRichard Tran Mills   if (csrP && csrA) {
1017190ae7a4SRichard Tran Mills     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1018db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1019190ae7a4SRichard Tran Mills   } else {
1020190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
1021190ae7a4SRichard Tran Mills   }
1022190ae7a4SRichard Tran Mills 
1023190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1024190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
102549ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
102649ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
102749ba5396SRichard Tran Mills    * is guaranteed. */
1028190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1029190ae7a4SRichard Tran Mills 
1030190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
1031190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1032190ae7a4SRichard Tran Mills }
1033190ae7a4SRichard Tran Mills 
1034190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1035190ae7a4SRichard Tran Mills {
1036190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1037190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
1038190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1039190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1040190ae7a4SRichard Tran Mills }
1041190ae7a4SRichard Tran Mills 
1042190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1043190ae7a4SRichard Tran Mills {
1044190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1045190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1046190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1047190ae7a4SRichard Tran Mills }
1048190ae7a4SRichard Tran Mills 
1049190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1050190ae7a4SRichard Tran Mills {
1051190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1052190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1053190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1054190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1055190ae7a4SRichard Tran Mills }
1056190ae7a4SRichard Tran Mills 
1057190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1058190ae7a4SRichard Tran Mills {
1059190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1060190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1061190ae7a4SRichard Tran Mills   Mat            A = product->A;
1062190ae7a4SRichard Tran Mills   PetscBool      set, flag;
1063190ae7a4SRichard Tran Mills 
1064190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1065190ae7a4SRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
1066190ae7a4SRichard Tran Mills   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1067190ae7a4SRichard Tran Mills    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1068190ae7a4SRichard Tran Mills    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1069190ae7a4SRichard Tran Mills    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1070190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL;
1071190ae7a4SRichard Tran Mills #else
1072190ae7a4SRichard Tran Mills   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1073190ae7a4SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1074190ae7a4SRichard Tran Mills   if (set && flag) {
1075190ae7a4SRichard Tran Mills     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1076190ae7a4SRichard Tran Mills     PetscFunctionReturn(0);
1077190ae7a4SRichard Tran Mills   } else {
1078190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1079190ae7a4SRichard Tran Mills   }
10801495fedeSRichard Tran Mills   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1081190ae7a4SRichard Tran Mills    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1082190ae7a4SRichard Tran Mills #endif
1083190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1084190ae7a4SRichard Tran Mills }
1085190ae7a4SRichard Tran Mills 
1086190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1087190ae7a4SRichard Tran Mills {
1088190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1089190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1090190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1091190ae7a4SRichard Tran Mills }
1092190ae7a4SRichard Tran Mills 
1093190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1094190ae7a4SRichard Tran Mills {
1095190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1096190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1097190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1098190ae7a4SRichard Tran Mills }
1099190ae7a4SRichard Tran Mills 
1100190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1101190ae7a4SRichard Tran Mills {
1102190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1103190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1104190ae7a4SRichard Tran Mills 
1105190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1106190ae7a4SRichard Tran Mills   switch (product->type) {
1107190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
1108190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1109190ae7a4SRichard Tran Mills     break;
1110190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
1111190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1112190ae7a4SRichard Tran Mills     break;
1113190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
1114190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1115190ae7a4SRichard Tran Mills     break;
1116190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
1117190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1118190ae7a4SRichard Tran Mills     break;
1119190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
1120190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1121190ae7a4SRichard Tran Mills     break;
1122190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
1123190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1124190ae7a4SRichard Tran Mills     break;
1125190ae7a4SRichard Tran Mills   default:
1126190ae7a4SRichard Tran Mills     break;
1127190ae7a4SRichard Tran Mills   }
1128190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1129190ae7a4SRichard Tran Mills }
1130431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1131190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
11324f53af40SRichard Tran Mills 
11334a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1134510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
11354a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
11364a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
11374a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
11384a2a386eSRichard Tran Mills {
11394a2a386eSRichard Tran Mills   PetscErrorCode ierr;
11404a2a386eSRichard Tran Mills   Mat            B = *newmat;
11414a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1142c9d46305SRichard Tran Mills   PetscBool      set;
1143e9c94282SRichard Tran Mills   PetscBool      sametype;
11444a2a386eSRichard Tran Mills 
11454a2a386eSRichard Tran Mills   PetscFunctionBegin;
11464a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
11474a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
11484a2a386eSRichard Tran Mills   }
11494a2a386eSRichard Tran Mills 
1150e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1151e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1152e9c94282SRichard Tran Mills 
11534a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
11544a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
11554a2a386eSRichard Tran Mills 
1156df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1157969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
11584a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
11594a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
11604a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1161c9d46305SRichard Tran Mills 
11624abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1163ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1164d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1165a8327b06SKarl Rupp #else
1166d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1167d995685eSRichard Tran Mills #endif
11685b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
11694abfa3b3SRichard Tran Mills 
11704abfa3b3SRichard Tran Mills   /* Parse command line options. */
1171c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
117248292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
117348292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1174c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1175ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1177d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1178d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1179d995685eSRichard Tran Mills   }
1180d995685eSRichard Tran Mills #endif
1181c9d46305SRichard Tran Mills 
1182ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1183df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1184969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1185df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1186969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
11878a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1188190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1189190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1190190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1191190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1192190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
119449ba5396SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1195190ae7a4SRichard Tran Mills #   else
1196190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
11974f53af40SRichard Tran Mills #   endif
1198e8be1fc7SRichard Tran Mills # endif
11991950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
12001950a7e7SRichard Tran Mills 
1201213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1202213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1203213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1204213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1205213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
12061950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
12074a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1208969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
12094a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1210969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1211c9d46305SRichard Tran Mills   }
12121950a7e7SRichard Tran Mills #endif
12134a2a386eSRichard Tran Mills 
12144a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
12154a2a386eSRichard Tran Mills 
1216190ae7a4SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1217190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1218190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1219190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1220190ae7a4SRichard Tran Mills #endif
1221190ae7a4SRichard Tran Mills #endif
1222190ae7a4SRichard Tran Mills   }
1223190ae7a4SRichard Tran Mills 
12244a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
12254a2a386eSRichard Tran Mills   *newmat = B;
12264a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12274a2a386eSRichard Tran Mills }
12284a2a386eSRichard Tran Mills 
12294a2a386eSRichard Tran Mills /*@C
12304a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
12314a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
12324a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
123390147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
123490147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1235597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1236597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1237597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
123890147e49SRichard Tran Mills 
1239d083f849SBarry Smith    Collective
12404a2a386eSRichard Tran Mills 
12414a2a386eSRichard Tran Mills    Input Parameters:
12424a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
12434a2a386eSRichard Tran Mills .  m - number of rows
12444a2a386eSRichard Tran Mills .  n - number of columns
12454a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
12464a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
12474a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
12484a2a386eSRichard Tran Mills 
12494a2a386eSRichard Tran Mills    Output Parameter:
12504a2a386eSRichard Tran Mills .  A - the matrix
12514a2a386eSRichard Tran Mills 
125290147e49SRichard Tran Mills    Options Database Keys:
125366b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
125466b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
125590147e49SRichard Tran Mills 
12564a2a386eSRichard Tran Mills    Notes:
12574a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
12584a2a386eSRichard Tran Mills 
12594a2a386eSRichard Tran Mills    Level: intermediate
12604a2a386eSRichard Tran Mills 
12614a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
12624a2a386eSRichard Tran Mills @*/
12634a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
12644a2a386eSRichard Tran Mills {
12654a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12664a2a386eSRichard Tran Mills 
12674a2a386eSRichard Tran Mills   PetscFunctionBegin;
12684a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
12694a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
12704a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
12714a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
12724a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12734a2a386eSRichard Tran Mills }
12744a2a386eSRichard Tran Mills 
12754a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
12764a2a386eSRichard Tran Mills {
12774a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12784a2a386eSRichard Tran Mills 
12794a2a386eSRichard Tran Mills   PetscFunctionBegin;
12804a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
12814a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
12824a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12834a2a386eSRichard Tran Mills }
1284