xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 3849ddb27598efbe8b32382fcfe3044208563945)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35c1d5218aSRichard Tran Mills #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
554f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
584222ddf1SHong Zhang 
59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60190ae7a4SRichard Tran Mills   if (!aijmkl->no_SpMV2) {
61190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63190ae7a4SRichard Tran Mills #endif
64190ae7a4SRichard Tran Mills   }
65190ae7a4SRichard Tran Mills 
664abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
68e9c94282SRichard Tran Mills    * the spptr pointer. */
69a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70a8327b06SKarl Rupp 
714abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
720632b357SRichard Tran Mills     sparse_status_t stat;
734abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
749c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
754abfa3b3SRichard Tran Mills   }
766718818eSStefano Zampini #endif
77e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
804a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
814a2a386eSRichard Tran Mills 
824a2a386eSRichard Tran Mills   *newmat = B;
834a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
844a2a386eSRichard Tran Mills }
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
874a2a386eSRichard Tran Mills {
884a2a386eSRichard Tran Mills   PetscErrorCode ierr;
894a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
904a2a386eSRichard Tran Mills 
914a2a386eSRichard Tran Mills   PetscFunctionBegin;
92e9c94282SRichard Tran Mills 
93e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
94e9c94282SRichard Tran Mills    * spptr pointer. */
95e9c94282SRichard Tran Mills   if (aijmkl) {
964a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
97ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
984abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
994abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1004abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
1019c46acdfSRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1024abfa3b3SRichard Tran Mills     }
1034abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1044a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
105e9c94282SRichard Tran Mills   }
1064a2a386eSRichard Tran Mills 
1074a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1084a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1094a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1104a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1114a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1124a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1134a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1144a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1154a2a386eSRichard Tran Mills }
1164a2a386eSRichard Tran Mills 
117190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1185b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1195b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1205b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1215b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1225b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1235b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1246e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1254a2a386eSRichard Tran Mills {
126ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1276e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1286e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1296e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
13045fbe478SRichard Tran Mills   PetscFunctionBegin;
1316e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1326e369cd5SRichard Tran Mills #else
133a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
134a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
135a8327b06SKarl Rupp   PetscInt         m,n;
136a8327b06SKarl Rupp   MatScalar        *aa;
137a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1386e369cd5SRichard Tran Mills   sparse_status_t  stat;
139551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1404a2a386eSRichard Tran Mills 
141a8327b06SKarl Rupp   PetscFunctionBegin;
142e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
143e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
144e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
145e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
146e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1476e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1484d51fa23SRichard Tran Mills #endif
1496e369cd5SRichard Tran Mills 
1500632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1510632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1520632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1530632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1549c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1550632b357SRichard Tran Mills   }
1568d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1576e369cd5SRichard Tran Mills 
158c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
159df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
160df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
161df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
16258678438SRichard Tran Mills   m = A->rmap->n;
16358678438SRichard Tran Mills   n = A->cmap->n;
164df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
16746cdef40SRichard Tran Mills   if ((a->nz!=0) && aa && !(A->structure_only)) {
1688d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1698d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17058678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
172df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
173e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
174df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
1761950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
177df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
178e8be1fc7SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
1791950a7e7SRichard Tran Mills     }
1804abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
181e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
182190ae7a4SRichard Tran Mills   } else {
183190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
184c9d46305SRichard Tran Mills   }
1856e369cd5SRichard Tran Mills 
1866e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
187d995685eSRichard Tran Mills #endif
1886e369cd5SRichard Tran Mills }
1896e369cd5SRichard Tran Mills 
190ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
192190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
19319afcda9SRichard Tran Mills {
19419afcda9SRichard Tran Mills   PetscErrorCode      ierr;
19519afcda9SRichard Tran Mills   sparse_status_t     stat;
19619afcda9SRichard Tran Mills   sparse_index_base_t indexing;
197190ae7a4SRichard Tran Mills   PetscInt            m,n;
19845fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
19919afcda9SRichard Tran Mills   MatScalar           *aa;
20019afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
20119afcda9SRichard Tran Mills 
202190ae7a4SRichard Tran Mills   if (csrA) {
20345fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204190ae7a4SRichard Tran Mills     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
2059c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206190ae7a4SRichard Tran Mills     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
207190ae7a4SRichard Tran Mills   } else {
208190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
209190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
210aab60f1bSRichard Tran Mills   }
211190ae7a4SRichard Tran Mills 
21219afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
21345fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
217aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218190ae7a4SRichard Tran Mills   if (csrA) {
219190ae7a4SRichard Tran Mills     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
220190ae7a4SRichard Tran Mills   } else {
221190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
222190ae7a4SRichard Tran Mills     ierr = MatSetUp(A);CHKERRQ(ierr);
223190ae7a4SRichard Tran Mills     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224190ae7a4SRichard Tran Mills     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225190ae7a4SRichard Tran Mills   }
22619afcda9SRichard Tran Mills 
22719afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22819afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22919afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2306c87cf42SRichard Tran Mills 
23119afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
23219afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2336c87cf42SRichard Tran Mills 
23419afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
23519afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23619afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
237f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
238f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
239f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
240190ae7a4SRichard Tran Mills   if (csrA) {
24119afcda9SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
24251539a68SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
24319afcda9SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
24451539a68SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
2451950a7e7SRichard Tran Mills   }
246e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
24719afcda9SRichard Tran Mills 
24819afcda9SRichard Tran Mills   PetscFunctionReturn(0);
24919afcda9SRichard Tran Mills }
25019afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
25119afcda9SRichard Tran Mills 
252190ae7a4SRichard Tran Mills 
253e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
254e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
255e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
256ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
257190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
258e8be1fc7SRichard Tran Mills {
259e8be1fc7SRichard Tran Mills   PetscInt            i;
260e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
261e8be1fc7SRichard Tran Mills   PetscInt            nz;
262e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
263e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
264e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
265e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
266e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
267e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
268e8be1fc7SRichard Tran Mills 
269e8be1fc7SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
270e8be1fc7SRichard Tran Mills 
271190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
272190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
273190ae7a4SRichard Tran Mills 
274e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
276e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
277e8be1fc7SRichard Tran Mills 
278e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
279e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
280e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
281e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
282e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
283e8be1fc7SRichard Tran Mills   }
284e8be1fc7SRichard Tran Mills 
285e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287e8be1fc7SRichard Tran Mills 
288e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
289e995cf24SRichard Tran Mills   /* We mark our matrix as having a valid, optimized MKL handle.
290e995cf24SRichard Tran Mills    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
291e995cf24SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
292e995cf24SRichard Tran Mills 
293e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
294e8be1fc7SRichard Tran Mills }
295e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
296e8be1fc7SRichard Tran Mills 
297*3849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
298*3849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
299*3849ddb2SRichard Tran Mills {
300*3849ddb2SRichard Tran Mills   PetscInt            i,j,k;
301*3849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
302*3849ddb2SRichard Tran Mills   PetscInt            nz;
303*3849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
304*3849ddb2SRichard Tran Mills   PetscScalar         *aa;
305*3849ddb2SRichard Tran Mills   PetscErrorCode      ierr;
306*3849ddb2SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
307*3849ddb2SRichard Tran Mills   sparse_status_t     stat;
308*3849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
309*3849ddb2SRichard Tran Mills 
310*3849ddb2SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
311*3849ddb2SRichard Tran Mills 
312*3849ddb2SRichard Tran Mills   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
313*3849ddb2SRichard Tran Mills 
314*3849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
315*3849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
316*3849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
317*3849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
318*3849ddb2SRichard Tran Mills   }
319*3849ddb2SRichard Tran Mills 
320*3849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
321*3849ddb2SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
322*3849ddb2SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
323*3849ddb2SRichard Tran Mills 
324*3849ddb2SRichard Tran Mills   k = 0;
325*3849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
326*3849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
327*3849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
328*3849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
329*3849ddb2SRichard Tran Mills       if (aa) {
330*3849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
331*3849ddb2SRichard Tran Mills       } else {
332*3849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
333*3849ddb2SRichard Tran Mills       }
334*3849ddb2SRichard Tran Mills       k++;
335*3849ddb2SRichard Tran Mills     }
336*3849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
337*3849ddb2SRichard Tran Mills   }
338*3849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
339*3849ddb2SRichard Tran Mills }
340*3849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
341*3849ddb2SRichard Tran Mills 
3426e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3436e369cd5SRichard Tran Mills {
3446e369cd5SRichard Tran Mills   PetscErrorCode ierr;
3456e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
3466e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3476e369cd5SRichard Tran Mills 
3486e369cd5SRichard Tran Mills   PetscFunctionBegin;
3496e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
3506e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
3516e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
352580bdb30SBarry Smith   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
3536e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3545b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3556e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3565b49642aSRichard Tran Mills   }
3576e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3586e369cd5SRichard Tran Mills }
3596e369cd5SRichard Tran Mills 
3606e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3616e369cd5SRichard Tran Mills {
3626e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3636e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3645b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3656e369cd5SRichard Tran Mills 
3666e369cd5SRichard Tran Mills   PetscFunctionBegin;
3676e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3686e369cd5SRichard Tran Mills 
3696e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3706e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3716e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3726e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
373d96e85feSRichard Tran Mills    * a lot of code duplication. */
3746e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3756e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3766e369cd5SRichard Tran Mills 
3775b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3785b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3795b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
3805b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3816e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3825b49642aSRichard Tran Mills   }
383df555b71SRichard Tran Mills 
3844a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3854a2a386eSRichard Tran Mills }
3864a2a386eSRichard Tran Mills 
387e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3884a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3894a2a386eSRichard Tran Mills {
3904a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3914a2a386eSRichard Tran Mills   const PetscScalar *x;
3924a2a386eSRichard Tran Mills   PetscScalar       *y;
3934a2a386eSRichard Tran Mills   const MatScalar   *aa;
3944a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3954a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
396db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
397db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
398db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3994a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
400db63039fSRichard Tran Mills   char              matdescra[6];
401db63039fSRichard Tran Mills 
4024a2a386eSRichard Tran Mills 
4034a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
404ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
405ff03dc53SRichard Tran Mills 
406ff03dc53SRichard Tran Mills   PetscFunctionBegin;
407db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
408db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
409ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
410ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
411ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
412ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
413ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
414ff03dc53SRichard Tran Mills 
415ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
416db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
417ff03dc53SRichard Tran Mills 
418ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
419ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
420ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
422ff03dc53SRichard Tran Mills }
4231950a7e7SRichard Tran Mills #endif
424ff03dc53SRichard Tran Mills 
425ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
426df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
427df555b71SRichard Tran Mills {
428df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
430df555b71SRichard Tran Mills   const PetscScalar *x;
431df555b71SRichard Tran Mills   PetscScalar       *y;
432df555b71SRichard Tran Mills   PetscErrorCode    ierr;
433df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
434551aa5c8SRichard Tran Mills   PetscObjectState  state;
435df555b71SRichard Tran Mills 
436df555b71SRichard Tran Mills   PetscFunctionBegin;
437df555b71SRichard Tran Mills 
43838987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
43938987b35SRichard Tran Mills   if(!a->nz) {
44038987b35SRichard Tran Mills     PetscInt i;
44138987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
44238987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
44338987b35SRichard Tran Mills     for (i=0; i<m; i++) {
44438987b35SRichard Tran Mills       y[i] = 0.0;
44538987b35SRichard Tran Mills     }
44638987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
44738987b35SRichard Tran Mills     PetscFunctionReturn(0);
44838987b35SRichard Tran Mills   }
449f36dfe3fSRichard Tran Mills 
450df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
451df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
452df555b71SRichard Tran Mills 
4533fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4543fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4553fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
456551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
457551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4583fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4593fa15762SRichard Tran Mills   }
4603fa15762SRichard Tran Mills 
461df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
462df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
4639c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
464df555b71SRichard Tran Mills 
465df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
466df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
467df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
468df555b71SRichard Tran Mills   PetscFunctionReturn(0);
469df555b71SRichard Tran Mills }
470d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
471df555b71SRichard Tran Mills 
472e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
473ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
474ff03dc53SRichard Tran Mills {
475ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
476ff03dc53SRichard Tran Mills   const PetscScalar *x;
477ff03dc53SRichard Tran Mills   PetscScalar       *y;
478ff03dc53SRichard Tran Mills   const MatScalar   *aa;
479ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
480ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
481db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
482db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
483db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
484ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
485db63039fSRichard Tran Mills   char              matdescra[6];
486ff03dc53SRichard Tran Mills 
487ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
488ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4894a2a386eSRichard Tran Mills 
4904a2a386eSRichard Tran Mills   PetscFunctionBegin;
491969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
492969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4934a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4944a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4954a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4964a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4974a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4984a2a386eSRichard Tran Mills 
4994a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
500db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
5014a2a386eSRichard Tran Mills 
5024a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
5034a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5044a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
5054a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
5064a2a386eSRichard Tran Mills }
5071950a7e7SRichard Tran Mills #endif
5084a2a386eSRichard Tran Mills 
509ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
510df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
511df555b71SRichard Tran Mills {
512df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
513df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
514df555b71SRichard Tran Mills   const PetscScalar *x;
515df555b71SRichard Tran Mills   PetscScalar       *y;
516df555b71SRichard Tran Mills   PetscErrorCode    ierr;
5170632b357SRichard Tran Mills   sparse_status_t   stat;
518551aa5c8SRichard Tran Mills   PetscObjectState  state;
519df555b71SRichard Tran Mills 
520df555b71SRichard Tran Mills   PetscFunctionBegin;
521df555b71SRichard Tran Mills 
52238987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
52338987b35SRichard Tran Mills   if(!a->nz) {
52438987b35SRichard Tran Mills     PetscInt i;
52538987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
52638987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
52738987b35SRichard Tran Mills     for (i=0; i<n; i++) {
52838987b35SRichard Tran Mills       y[i] = 0.0;
52938987b35SRichard Tran Mills     }
53038987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
53138987b35SRichard Tran Mills     PetscFunctionReturn(0);
53238987b35SRichard Tran Mills   }
533f36dfe3fSRichard Tran Mills 
534df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
535df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
536df555b71SRichard Tran Mills 
5373fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5383fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5393fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
540551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
541551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5423fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5433fa15762SRichard Tran Mills   }
5443fa15762SRichard Tran Mills 
545df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
546df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
5479c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
548df555b71SRichard Tran Mills 
549df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
550df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
551df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
552df555b71SRichard Tran Mills   PetscFunctionReturn(0);
553df555b71SRichard Tran Mills }
554d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
555df555b71SRichard Tran Mills 
556e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
5574a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
5584a2a386eSRichard Tran Mills {
5594a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5604a2a386eSRichard Tran Mills   const PetscScalar *x;
5614a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5624a2a386eSRichard Tran Mills   const MatScalar   *aa;
5634a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5644a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
565db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5664a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5674a2a386eSRichard Tran Mills   PetscInt          i;
5684a2a386eSRichard Tran Mills 
569ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
570ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
571a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
572db63039fSRichard Tran Mills   PetscScalar       beta;
573a84739b8SRichard Tran Mills   char              matdescra[6];
574ff03dc53SRichard Tran Mills 
575ff03dc53SRichard Tran Mills   PetscFunctionBegin;
576a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
577a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
578a84739b8SRichard Tran Mills 
579ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
580ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
581ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
582ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
583ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
584ff03dc53SRichard Tran Mills 
585ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
586a84739b8SRichard Tran Mills   if (zz == yy) {
587a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
588db63039fSRichard Tran Mills     beta = 1.0;
589db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
590a84739b8SRichard Tran Mills   } else {
591db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
592db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
593db63039fSRichard Tran Mills     beta = 0.0;
594db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
595ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
596ff03dc53SRichard Tran Mills       z[i] += y[i];
597ff03dc53SRichard Tran Mills     }
598a84739b8SRichard Tran Mills   }
599ff03dc53SRichard Tran Mills 
600ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
601ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
602ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
603ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
604ff03dc53SRichard Tran Mills }
6051950a7e7SRichard Tran Mills #endif
606ff03dc53SRichard Tran Mills 
607ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
608df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
609df555b71SRichard Tran Mills {
610df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
611df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
612df555b71SRichard Tran Mills   const PetscScalar *x;
613df555b71SRichard Tran Mills   PetscScalar       *y,*z;
614df555b71SRichard Tran Mills   PetscErrorCode    ierr;
615df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
616df555b71SRichard Tran Mills   PetscInt          i;
617df555b71SRichard Tran Mills 
618df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
619df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
620551aa5c8SRichard Tran Mills   PetscObjectState  state;
621df555b71SRichard Tran Mills 
622df555b71SRichard Tran Mills   PetscFunctionBegin;
623df555b71SRichard Tran Mills 
62438987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
62538987b35SRichard Tran Mills   if(!a->nz) {
62638987b35SRichard Tran Mills     PetscInt i;
62738987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62838987b35SRichard Tran Mills     for (i=0; i<m; i++) {
62938987b35SRichard Tran Mills       z[i] = y[i];
63038987b35SRichard Tran Mills     }
63138987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
63238987b35SRichard Tran Mills     PetscFunctionReturn(0);
63338987b35SRichard Tran Mills   }
634df555b71SRichard Tran Mills 
635df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
636df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
637df555b71SRichard Tran Mills 
6383fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6393fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6403fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
641551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
642551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6433fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6443fa15762SRichard Tran Mills   }
6453fa15762SRichard Tran Mills 
646df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
647df555b71SRichard Tran Mills   if (zz == yy) {
648df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
649df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
650db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
6519c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
652df555b71SRichard Tran Mills   } else {
653df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
654df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
655db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
6569c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
657df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
658df555b71SRichard Tran Mills       z[i] += y[i];
659df555b71SRichard Tran Mills     }
660df555b71SRichard Tran Mills   }
661df555b71SRichard Tran Mills 
662df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
663df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
664df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
665df555b71SRichard Tran Mills   PetscFunctionReturn(0);
666df555b71SRichard Tran Mills }
667d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668df555b71SRichard Tran Mills 
669e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
670ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
671ff03dc53SRichard Tran Mills {
672ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
673ff03dc53SRichard Tran Mills   const PetscScalar *x;
674ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
675ff03dc53SRichard Tran Mills   const MatScalar   *aa;
676ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
677ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
678db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
679ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
680ff03dc53SRichard Tran Mills   PetscInt          i;
681ff03dc53SRichard Tran Mills 
682ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
683ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
684a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
685db63039fSRichard Tran Mills   PetscScalar       beta;
686a84739b8SRichard Tran Mills   char              matdescra[6];
6874a2a386eSRichard Tran Mills 
6884a2a386eSRichard Tran Mills   PetscFunctionBegin;
689a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
690a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
691a84739b8SRichard Tran Mills 
6924a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6934a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6944a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6954a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6964a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6974a2a386eSRichard Tran Mills 
6984a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
699a84739b8SRichard Tran Mills   if (zz == yy) {
700a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
701db63039fSRichard Tran Mills     beta = 1.0;
702969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
703a84739b8SRichard Tran Mills   } else {
704db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
705db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
706db63039fSRichard Tran Mills     beta = 0.0;
707db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
708969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
7094a2a386eSRichard Tran Mills       z[i] += y[i];
7104a2a386eSRichard Tran Mills     }
711a84739b8SRichard Tran Mills   }
7124a2a386eSRichard Tran Mills 
7134a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
7144a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7154a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
7164a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7174a2a386eSRichard Tran Mills }
7181950a7e7SRichard Tran Mills #endif
7194a2a386eSRichard Tran Mills 
720ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
721df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
722df555b71SRichard Tran Mills {
723df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
724df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
725df555b71SRichard Tran Mills   const PetscScalar *x;
726df555b71SRichard Tran Mills   PetscScalar       *y,*z;
727df555b71SRichard Tran Mills   PetscErrorCode    ierr;
728969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
729df555b71SRichard Tran Mills   PetscInt          i;
730551aa5c8SRichard Tran Mills   PetscObjectState  state;
731df555b71SRichard Tran Mills 
732df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
733df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
734df555b71SRichard Tran Mills 
735df555b71SRichard Tran Mills   PetscFunctionBegin;
736df555b71SRichard Tran Mills 
73738987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
73838987b35SRichard Tran Mills   if(!a->nz) {
73938987b35SRichard Tran Mills     PetscInt i;
74038987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
74138987b35SRichard Tran Mills     for (i=0; i<n; i++) {
74238987b35SRichard Tran Mills       z[i] = y[i];
74338987b35SRichard Tran Mills     }
74438987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
74538987b35SRichard Tran Mills     PetscFunctionReturn(0);
74638987b35SRichard Tran Mills   }
747f36dfe3fSRichard Tran Mills 
748df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
749df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
750df555b71SRichard Tran Mills 
7513fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
7523fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
7533fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
754551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
755551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
7563fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
7573fa15762SRichard Tran Mills   }
7583fa15762SRichard Tran Mills 
759df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
760df555b71SRichard Tran Mills   if (zz == yy) {
761df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
762df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
763db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
7649c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
765df555b71SRichard Tran Mills   } else {
766df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
767df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
768db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
7699c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
770969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
771df555b71SRichard Tran Mills       z[i] += y[i];
772df555b71SRichard Tran Mills     }
773df555b71SRichard Tran Mills   }
774df555b71SRichard Tran Mills 
775df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
776df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
777df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
778df555b71SRichard Tran Mills   PetscFunctionReturn(0);
779df555b71SRichard Tran Mills }
780d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
781df555b71SRichard Tran Mills 
782190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
7838a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
784190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
785431879ecSRichard Tran Mills {
786190ae7a4SRichard Tran Mills   Mat_SeqAIJMKL       *a, *b;
787431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
788190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
789431879ecSRichard Tran Mills   PetscErrorCode      ierr;
790431879ecSRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
791431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
792431879ecSRichard Tran Mills   PetscObjectState    state;
793431879ecSRichard Tran Mills 
794431879ecSRichard Tran Mills   PetscFunctionBegin;
795190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
796190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
797190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
798190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
799190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
800190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
801190ae7a4SRichard Tran Mills 
802431879ecSRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
803431879ecSRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
804431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
805431879ecSRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
806431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
807431879ecSRichard Tran Mills   }
808431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
809431879ecSRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
810431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
811431879ecSRichard Tran Mills   }
812431879ecSRichard Tran Mills   csrA = a->csrA;
813431879ecSRichard Tran Mills   csrB = b->csrA;
814431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
815431879ecSRichard Tran Mills 
816190ae7a4SRichard Tran Mills   if (csrA && csrB) {
817190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
818431879ecSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply");
819190ae7a4SRichard Tran Mills   } else {
820190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
821190ae7a4SRichard Tran Mills   }
822190ae7a4SRichard Tran Mills 
823190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
824431879ecSRichard Tran Mills 
825431879ecSRichard Tran Mills   PetscFunctionReturn(0);
826431879ecSRichard Tran Mills }
827431879ecSRichard Tran Mills 
828190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
829e8be1fc7SRichard Tran Mills {
830e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *a, *b, *c;
831e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
832e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
833e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
834e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
835e8be1fc7SRichard Tran Mills   PetscObjectState    state;
836e8be1fc7SRichard Tran Mills 
837e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
838e8be1fc7SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
839e8be1fc7SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
840e8be1fc7SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
841e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
842e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
843e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
844e8be1fc7SRichard Tran Mills   }
845e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
846e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
847e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
848e8be1fc7SRichard Tran Mills   }
849e8be1fc7SRichard Tran Mills   csrA = a->csrA;
850e8be1fc7SRichard Tran Mills   csrB = b->csrA;
851e8be1fc7SRichard Tran Mills   csrC = c->csrA;
852e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
853e8be1fc7SRichard Tran Mills 
854190ae7a4SRichard Tran Mills   if (csrA && csrB) {
855190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
856e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
857190ae7a4SRichard Tran Mills   } else {
858190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
859190ae7a4SRichard Tran Mills   }
860e8be1fc7SRichard Tran Mills 
861e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8624f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
863e8be1fc7SRichard Tran Mills 
864e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
865e8be1fc7SRichard Tran Mills }
866e8be1fc7SRichard Tran Mills 
867190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
8684f53af40SRichard Tran Mills {
869190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
870190ae7a4SRichard Tran Mills 
871190ae7a4SRichard Tran Mills   PetscFunctionBegin;
872190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
873190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
874190ae7a4SRichard Tran Mills }
875190ae7a4SRichard Tran Mills 
876190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
877190ae7a4SRichard Tran Mills {
878190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
879190ae7a4SRichard Tran Mills 
880190ae7a4SRichard Tran Mills   PetscFunctionBegin;
881190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
882190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
883190ae7a4SRichard Tran Mills }
884190ae7a4SRichard Tran Mills 
885190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
886190ae7a4SRichard Tran Mills {
887190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
888190ae7a4SRichard Tran Mills 
889190ae7a4SRichard Tran Mills   PetscFunctionBegin;
890190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
891190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
892190ae7a4SRichard Tran Mills }
893190ae7a4SRichard Tran Mills 
894190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
895190ae7a4SRichard Tran Mills {
896190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
897190ae7a4SRichard Tran Mills 
898190ae7a4SRichard Tran Mills   PetscFunctionBegin;
899190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
900190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
901190ae7a4SRichard Tran Mills }
902190ae7a4SRichard Tran Mills 
903190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
904190ae7a4SRichard Tran Mills {
905190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
906190ae7a4SRichard Tran Mills 
907190ae7a4SRichard Tran Mills   PetscFunctionBegin;
908190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
909190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
910190ae7a4SRichard Tran Mills }
911190ae7a4SRichard Tran Mills 
912190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
913190ae7a4SRichard Tran Mills {
914190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
915190ae7a4SRichard Tran Mills 
916190ae7a4SRichard Tran Mills   PetscFunctionBegin;
917190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
918190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
919190ae7a4SRichard Tran Mills }
920190ae7a4SRichard Tran Mills 
921190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
922190ae7a4SRichard Tran Mills {
923190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
924190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
925190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
926190ae7a4SRichard Tran Mills 
927190ae7a4SRichard Tran Mills   PetscFunctionBegin;
928190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
929190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
930190ae7a4SRichard Tran Mills }
931190ae7a4SRichard Tran Mills 
932190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
933190ae7a4SRichard Tran Mills {
934190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
935190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
936190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
937190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
938190ae7a4SRichard Tran Mills 
939190ae7a4SRichard Tran Mills   PetscFunctionBegin;
940190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
941190ae7a4SRichard Tran Mills 
942190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
943190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
944190ae7a4SRichard Tran Mills }
945190ae7a4SRichard Tran Mills 
946190ae7a4SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat P,Mat C)
947190ae7a4SRichard Tran Mills {
948190ae7a4SRichard Tran Mills   Mat                 Ct;
949190ae7a4SRichard Tran Mills   Vec                 zeros;
9504f53af40SRichard Tran Mills   Mat_SeqAIJMKL       *a, *p, *c;
9514f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
9524f53af40SRichard Tran Mills   PetscBool           set, flag;
9534f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
954b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
9554f53af40SRichard Tran Mills   PetscObjectState    state;
9564f53af40SRichard Tran Mills   PetscErrorCode      ierr;
9574f53af40SRichard Tran Mills 
9584f53af40SRichard Tran Mills   PetscFunctionBegin;
9594f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
960190ae7a4SRichard Tran Mills   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL() called on matrix A not marked as symmetric");
9614f53af40SRichard Tran Mills 
9624f53af40SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
9634f53af40SRichard Tran Mills   p = (Mat_SeqAIJMKL*)P->spptr;
9644f53af40SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
9654f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
9664f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
9674f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
9684f53af40SRichard Tran Mills   }
9694f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
9704f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
9714f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
9724f53af40SRichard Tran Mills   }
9734f53af40SRichard Tran Mills   csrA = a->csrA;
9744f53af40SRichard Tran Mills   csrP = p->csrA;
9754f53af40SRichard Tran Mills   csrC = c->csrA;
976b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
977190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
978b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
9794f53af40SRichard Tran Mills 
980f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
981b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
9824f53af40SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
9834f53af40SRichard Tran Mills 
984190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
985190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
986190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
987190ae7a4SRichard Tran Mills    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
988190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
989190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
990190ae7a4SRichard Tran Mills    * the full matrix. */
9914f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
992190ae7a4SRichard Tran Mills   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
993190ae7a4SRichard Tran Mills   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
994190ae7a4SRichard Tran Mills   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
995190ae7a4SRichard Tran Mills   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
996190ae7a4SRichard Tran Mills   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
997190ae7a4SRichard Tran Mills   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
998190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
999190ae7a4SRichard Tran Mills   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
1000190ae7a4SRichard Tran Mills   C->product->type = MATPRODUCT_PtAP;
1001190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
1002190ae7a4SRichard Tran Mills   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
1003190ae7a4SRichard Tran Mills   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
10044f53af40SRichard Tran Mills 
10054f53af40SRichard Tran Mills   PetscFunctionReturn(0);
10064f53af40SRichard Tran Mills }
1007190ae7a4SRichard Tran Mills 
1008190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
1009190ae7a4SRichard Tran Mills {
1010190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
1011190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
1012190ae7a4SRichard Tran Mills   Mat_SeqAIJMKL       *a,*p;
1013190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
1014190ae7a4SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
1015190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
1016190ae7a4SRichard Tran Mills   PetscObjectState    state;
1017190ae7a4SRichard Tran Mills   PetscErrorCode      ierr;
1018190ae7a4SRichard Tran Mills 
1019190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1020190ae7a4SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
1021190ae7a4SRichard Tran Mills   p = (Mat_SeqAIJMKL*)P->spptr;
1022190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1023190ae7a4SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
1024190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
1025190ae7a4SRichard Tran Mills   }
1026190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1027190ae7a4SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
1028190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
1029190ae7a4SRichard Tran Mills   }
1030190ae7a4SRichard Tran Mills   csrA = a->csrA;
1031190ae7a4SRichard Tran Mills   csrP = p->csrA;
1032190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1033190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1034190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1035190ae7a4SRichard Tran Mills 
1036190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1037190ae7a4SRichard Tran Mills   if (csrP && csrA) {
1038190ae7a4SRichard Tran Mills     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1039190ae7a4SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr");
1040190ae7a4SRichard Tran Mills   } else {
1041190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
1042190ae7a4SRichard Tran Mills   }
1043190ae7a4SRichard Tran Mills 
1044190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1045190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1046190ae7a4SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(). I believe that leaving things
1047190ae7a4SRichard Tran Mills    * in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this is
1048190ae7a4SRichard Tran Mills    * guaranteed. */
1049190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1050190ae7a4SRichard Tran Mills 
1051190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
1052190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1053190ae7a4SRichard Tran Mills }
1054190ae7a4SRichard Tran Mills 
1055190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1056190ae7a4SRichard Tran Mills {
1057190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1058190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
1059190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1060190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1061190ae7a4SRichard Tran Mills }
1062190ae7a4SRichard Tran Mills 
1063190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1064190ae7a4SRichard Tran Mills {
1065190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1066190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1067190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1068190ae7a4SRichard Tran Mills }
1069190ae7a4SRichard Tran Mills 
1070190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1071190ae7a4SRichard Tran Mills {
1072190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1073190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1074190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1075190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1076190ae7a4SRichard Tran Mills }
1077190ae7a4SRichard Tran Mills 
1078190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1079190ae7a4SRichard Tran Mills {
1080190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1081190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1082190ae7a4SRichard Tran Mills   Mat            A = product->A;
1083190ae7a4SRichard Tran Mills   PetscBool      set, flag;
1084190ae7a4SRichard Tran Mills 
1085190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1086190ae7a4SRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
1087190ae7a4SRichard Tran Mills   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1088190ae7a4SRichard Tran Mills    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1089190ae7a4SRichard Tran Mills    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1090190ae7a4SRichard Tran Mills    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1091190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL;
1092190ae7a4SRichard Tran Mills #else
1093190ae7a4SRichard Tran Mills   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1094190ae7a4SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1095190ae7a4SRichard Tran Mills   if (set && flag) {
1096190ae7a4SRichard Tran Mills     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1097190ae7a4SRichard Tran Mills     PetscFunctionReturn(0);
1098190ae7a4SRichard Tran Mills   } else {
1099190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1100190ae7a4SRichard Tran Mills   }
1101190ae7a4SRichard Tran Mills   /* Note that we don't set C->ops->productnumeric here, as this has must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL(),
1102190ae7a4SRichard Tran Mills    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1103190ae7a4SRichard Tran Mills #endif
1104190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1105190ae7a4SRichard Tran Mills }
1106190ae7a4SRichard Tran Mills 
1107190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1108190ae7a4SRichard Tran Mills {
1109190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1110190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1111190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1112190ae7a4SRichard Tran Mills }
1113190ae7a4SRichard Tran Mills 
1114190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1115190ae7a4SRichard Tran Mills {
1116190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1117190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1118190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1119190ae7a4SRichard Tran Mills }
1120190ae7a4SRichard Tran Mills 
1121190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1122190ae7a4SRichard Tran Mills {
1123190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1124190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1125190ae7a4SRichard Tran Mills 
1126190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1127190ae7a4SRichard Tran Mills   switch (product->type) {
1128190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
1129190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1130190ae7a4SRichard Tran Mills     break;
1131190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
1132190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1133190ae7a4SRichard Tran Mills     break;
1134190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
1135190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1136190ae7a4SRichard Tran Mills     break;
1137190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
1138190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1139190ae7a4SRichard Tran Mills     break;
1140190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
1141190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1142190ae7a4SRichard Tran Mills     break;
1143190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
1144190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1145190ae7a4SRichard Tran Mills     break;
1146190ae7a4SRichard Tran Mills   default:
1147190ae7a4SRichard Tran Mills     break;
1148190ae7a4SRichard Tran Mills   }
1149190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1150190ae7a4SRichard Tran Mills }
1151431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1152190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
11534f53af40SRichard Tran Mills 
11544a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1155510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
11564a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
11574a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
11584a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
11594a2a386eSRichard Tran Mills {
11604a2a386eSRichard Tran Mills   PetscErrorCode ierr;
11614a2a386eSRichard Tran Mills   Mat            B = *newmat;
11624a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1163c9d46305SRichard Tran Mills   PetscBool      set;
1164e9c94282SRichard Tran Mills   PetscBool      sametype;
11654a2a386eSRichard Tran Mills 
11664a2a386eSRichard Tran Mills   PetscFunctionBegin;
11674a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
11684a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
11694a2a386eSRichard Tran Mills   }
11704a2a386eSRichard Tran Mills 
1171e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1172e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1173e9c94282SRichard Tran Mills 
11744a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
11754a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
11764a2a386eSRichard Tran Mills 
1177df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1178969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
11794a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
11804a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
11814a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1182c9d46305SRichard Tran Mills 
11834abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1184ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1185d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1186a8327b06SKarl Rupp #else
1187d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1188d995685eSRichard Tran Mills #endif
11895b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
11904abfa3b3SRichard Tran Mills 
11914abfa3b3SRichard Tran Mills   /* Parse command line options. */
1192c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
119348292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
119448292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1195c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1196ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1197d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1198d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1199d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1200d995685eSRichard Tran Mills   }
1201d995685eSRichard Tran Mills #endif
1202c9d46305SRichard Tran Mills 
1203ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1204df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1205969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1206df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1207969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
12088a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1209190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1210190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1211190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1212190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1213190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1214ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
1215190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL;
1216190ae7a4SRichard Tran Mills #   else
1217190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
12184f53af40SRichard Tran Mills #   endif
1219e8be1fc7SRichard Tran Mills # endif
12201950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
12211950a7e7SRichard Tran Mills 
1222213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1223213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1224213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1225213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1226213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
12271950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
12284a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1229969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
12304a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1231969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1232c9d46305SRichard Tran Mills   }
12331950a7e7SRichard Tran Mills #endif
12344a2a386eSRichard Tran Mills 
12354a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
12364a2a386eSRichard Tran Mills 
1237190ae7a4SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1238190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1239190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1240190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1241190ae7a4SRichard Tran Mills #endif
1242190ae7a4SRichard Tran Mills #endif
1243190ae7a4SRichard Tran Mills   }
1244190ae7a4SRichard Tran Mills 
12454a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
12464a2a386eSRichard Tran Mills   *newmat = B;
12474a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12484a2a386eSRichard Tran Mills }
12494a2a386eSRichard Tran Mills 
12504a2a386eSRichard Tran Mills /*@C
12514a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
12524a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
12534a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
125490147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
125590147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1256597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1257597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1258597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
125990147e49SRichard Tran Mills 
1260d083f849SBarry Smith    Collective
12614a2a386eSRichard Tran Mills 
12624a2a386eSRichard Tran Mills    Input Parameters:
12634a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
12644a2a386eSRichard Tran Mills .  m - number of rows
12654a2a386eSRichard Tran Mills .  n - number of columns
12664a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
12674a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
12684a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
12694a2a386eSRichard Tran Mills 
12704a2a386eSRichard Tran Mills    Output Parameter:
12714a2a386eSRichard Tran Mills .  A - the matrix
12724a2a386eSRichard Tran Mills 
127390147e49SRichard Tran Mills    Options Database Keys:
127466b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
127566b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
127690147e49SRichard Tran Mills 
12774a2a386eSRichard Tran Mills    Notes:
12784a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
12794a2a386eSRichard Tran Mills 
12804a2a386eSRichard Tran Mills    Level: intermediate
12814a2a386eSRichard Tran Mills 
12824a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
12834a2a386eSRichard Tran Mills @*/
12844a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
12854a2a386eSRichard Tran Mills {
12864a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12874a2a386eSRichard Tran Mills 
12884a2a386eSRichard Tran Mills   PetscFunctionBegin;
12894a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
12904a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
12914a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
12924a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
12934a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12944a2a386eSRichard Tran Mills }
12954a2a386eSRichard Tran Mills 
12964a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
12974a2a386eSRichard Tran Mills {
12984a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12994a2a386eSRichard Tran Mills 
13004a2a386eSRichard Tran Mills   PetscFunctionBegin;
13014a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
13024a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
13034a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
13044a2a386eSRichard Tran Mills }
1305