xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 431879ec409417d744fb9e7bc2d5f08b9a65cd7f)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35c1d5218aSRichard Tran Mills #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50*431879ecSRichard Tran Mills   B->ops->matmultsymbolic  = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
524f53af40SRichard Tran Mills   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
534a2a386eSRichard Tran Mills 
54e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
554222ddf1SHong Zhang 
56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
574abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
59e9c94282SRichard Tran Mills    * the spptr pointer. */
60a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61a8327b06SKarl Rupp 
624abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
630632b357SRichard Tran Mills     sparse_status_t stat;
644abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
659c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
664abfa3b3SRichard Tran Mills   }
676718818eSStefano Zampini #endif
68e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
694a2a386eSRichard Tran Mills 
704a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
714a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
724a2a386eSRichard Tran Mills 
734a2a386eSRichard Tran Mills   *newmat = B;
744a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
754a2a386eSRichard Tran Mills }
764a2a386eSRichard Tran Mills 
774a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
784a2a386eSRichard Tran Mills {
794a2a386eSRichard Tran Mills   PetscErrorCode ierr;
804a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
814a2a386eSRichard Tran Mills 
824a2a386eSRichard Tran Mills   PetscFunctionBegin;
83e9c94282SRichard Tran Mills 
84e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
85e9c94282SRichard Tran Mills    * spptr pointer. */
86e9c94282SRichard Tran Mills   if (aijmkl) {
874a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
88ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
894abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
904abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
914abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
929c46acdfSRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
934abfa3b3SRichard Tran Mills     }
944abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
954a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
96e9c94282SRichard Tran Mills   }
974a2a386eSRichard Tran Mills 
984a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
994a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1004a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1014a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1024a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1034a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1044a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1054a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1064a2a386eSRichard Tran Mills }
1074a2a386eSRichard Tran Mills 
1085b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1095b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1105b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1115b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1125b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1135b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1145b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1156e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1164a2a386eSRichard Tran Mills {
117ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1186e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1196e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1206e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
12145fbe478SRichard Tran Mills   PetscFunctionBegin;
1226e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1236e369cd5SRichard Tran Mills #else
124a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
125a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
126a8327b06SKarl Rupp   PetscInt         m,n;
127a8327b06SKarl Rupp   MatScalar        *aa;
128a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1296e369cd5SRichard Tran Mills   sparse_status_t  stat;
130551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1314a2a386eSRichard Tran Mills 
132a8327b06SKarl Rupp   PetscFunctionBegin;
133e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
134e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
135e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
136e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
137e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1386e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1394d51fa23SRichard Tran Mills #endif
1406e369cd5SRichard Tran Mills 
1410632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1420632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1430632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1440632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1459c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1460632b357SRichard Tran Mills   }
1478d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1486e369cd5SRichard Tran Mills 
149c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
150df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
151df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
152df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
15358678438SRichard Tran Mills   m = A->rmap->n;
15458678438SRichard Tran Mills   n = A->cmap->n;
155df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
156df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
157df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
15846cdef40SRichard Tran Mills   if ((a->nz!=0) && aa && !(A->structure_only)) {
1598d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1608d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
16158678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
162e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
163df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
164e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
165df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
166e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
1671950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
168df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
169e8be1fc7SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
1701950a7e7SRichard Tran Mills     }
1714abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
172e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
173c9d46305SRichard Tran Mills   }
1746e369cd5SRichard Tran Mills 
1756e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
176d995685eSRichard Tran Mills #endif
1776e369cd5SRichard Tran Mills }
1786e369cd5SRichard Tran Mills 
17919afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
18019afcda9SRichard Tran Mills  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
1816c87cf42SRichard Tran Mills  * matrix handle.
182aab60f1bSRichard Tran Mills  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
183aab60f1bSRichard Tran Mills  * there is no good alternative. */
184ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1856c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
18619afcda9SRichard Tran Mills {
18719afcda9SRichard Tran Mills   PetscErrorCode      ierr;
18819afcda9SRichard Tran Mills   sparse_status_t     stat;
18919afcda9SRichard Tran Mills   sparse_index_base_t indexing;
19019afcda9SRichard Tran Mills   PetscInt            nrows, ncols;
19145fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
19219afcda9SRichard Tran Mills   MatScalar           *aa;
19319afcda9SRichard Tran Mills   Mat                 A;
19419afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
19519afcda9SRichard Tran Mills 
19645fbe478SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
19745fbe478SRichard Tran Mills   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
1989c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
1996c87cf42SRichard Tran Mills 
200aab60f1bSRichard Tran Mills   if (reuse == MAT_REUSE_MATRIX) {
201aab60f1bSRichard Tran Mills     ierr = MatDestroy(mat);CHKERRQ(ierr);
202aab60f1bSRichard Tran Mills   }
20319afcda9SRichard Tran Mills   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
20419afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
20545fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
206aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
207aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
208aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
209aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
21019afcda9SRichard Tran Mills   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
21119afcda9SRichard Tran Mills 
21219afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
21319afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
21419afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2156c87cf42SRichard Tran Mills 
21619afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
21719afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2186c87cf42SRichard Tran Mills 
21919afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
22019afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
22119afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
222f3fd1758SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
223f3fd1758SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
224f3fd1758SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
22519afcda9SRichard Tran Mills   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
22651539a68SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
22719afcda9SRichard Tran Mills   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
22851539a68SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
2291950a7e7SRichard Tran Mills   if (!aijmkl->no_SpMV2) {
23019afcda9SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
23151539a68SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
2321950a7e7SRichard Tran Mills   }
23319afcda9SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
234e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
23519afcda9SRichard Tran Mills 
23619afcda9SRichard Tran Mills   *mat = A;
23719afcda9SRichard Tran Mills   PetscFunctionReturn(0);
23819afcda9SRichard Tran Mills }
23919afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
24019afcda9SRichard Tran Mills 
241e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
242e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
243e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
244ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
245e8be1fc7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
246e8be1fc7SRichard Tran Mills {
247e8be1fc7SRichard Tran Mills   PetscInt            i;
248e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
249e8be1fc7SRichard Tran Mills   PetscInt            nz;
250e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
251e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
252e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
253e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
254e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
255e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
256e8be1fc7SRichard Tran Mills 
257e8be1fc7SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
258e8be1fc7SRichard Tran Mills 
259e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
260e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
261e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
262e8be1fc7SRichard Tran Mills 
263e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
264e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
265e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
266e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
267e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
268e8be1fc7SRichard Tran Mills   }
269e8be1fc7SRichard Tran Mills 
270e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272e8be1fc7SRichard Tran Mills 
273e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
274e995cf24SRichard Tran Mills   /* We mark our matrix as having a valid, optimized MKL handle.
275e995cf24SRichard Tran Mills    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
276e995cf24SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
277e995cf24SRichard Tran Mills 
278e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
279e8be1fc7SRichard Tran Mills }
280e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
281e8be1fc7SRichard Tran Mills 
2826e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2836e369cd5SRichard Tran Mills {
2846e369cd5SRichard Tran Mills   PetscErrorCode ierr;
2856e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
2866e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
2876e369cd5SRichard Tran Mills 
2886e369cd5SRichard Tran Mills   PetscFunctionBegin;
2896e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
2906e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
2916e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
292580bdb30SBarry Smith   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
2936e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
2945b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2956e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2965b49642aSRichard Tran Mills   }
2976e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
2986e369cd5SRichard Tran Mills }
2996e369cd5SRichard Tran Mills 
3006e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3016e369cd5SRichard Tran Mills {
3026e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3036e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3045b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3056e369cd5SRichard Tran Mills 
3066e369cd5SRichard Tran Mills   PetscFunctionBegin;
3076e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3086e369cd5SRichard Tran Mills 
3096e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3106e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3116e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3126e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
313d96e85feSRichard Tran Mills    * a lot of code duplication. */
3146e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3156e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3166e369cd5SRichard Tran Mills 
3175b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3185b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3195b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
3205b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3216e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3225b49642aSRichard Tran Mills   }
323df555b71SRichard Tran Mills 
3244a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3254a2a386eSRichard Tran Mills }
3264a2a386eSRichard Tran Mills 
327e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3284a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3294a2a386eSRichard Tran Mills {
3304a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3314a2a386eSRichard Tran Mills   const PetscScalar *x;
3324a2a386eSRichard Tran Mills   PetscScalar       *y;
3334a2a386eSRichard Tran Mills   const MatScalar   *aa;
3344a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3354a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
336db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
337db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
338db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3394a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
340db63039fSRichard Tran Mills   char              matdescra[6];
341db63039fSRichard Tran Mills 
3424a2a386eSRichard Tran Mills 
3434a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
344ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
345ff03dc53SRichard Tran Mills 
346ff03dc53SRichard Tran Mills   PetscFunctionBegin;
347db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
348db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
349ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
350ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
351ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
352ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
353ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
354ff03dc53SRichard Tran Mills 
355ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
356db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
357ff03dc53SRichard Tran Mills 
358ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
359ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
360ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
361ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
362ff03dc53SRichard Tran Mills }
3631950a7e7SRichard Tran Mills #endif
364ff03dc53SRichard Tran Mills 
365ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
366df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
367df555b71SRichard Tran Mills {
368df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
369df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
370df555b71SRichard Tran Mills   const PetscScalar *x;
371df555b71SRichard Tran Mills   PetscScalar       *y;
372df555b71SRichard Tran Mills   PetscErrorCode    ierr;
373df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
374551aa5c8SRichard Tran Mills   PetscObjectState  state;
375df555b71SRichard Tran Mills 
376df555b71SRichard Tran Mills   PetscFunctionBegin;
377df555b71SRichard Tran Mills 
37838987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
37938987b35SRichard Tran Mills   if(!a->nz) {
38038987b35SRichard Tran Mills     PetscInt i;
38138987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
38238987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
38338987b35SRichard Tran Mills     for (i=0; i<m; i++) {
38438987b35SRichard Tran Mills       y[i] = 0.0;
38538987b35SRichard Tran Mills     }
38638987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
38738987b35SRichard Tran Mills     PetscFunctionReturn(0);
38838987b35SRichard Tran Mills   }
389f36dfe3fSRichard Tran Mills 
390df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
391df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
392df555b71SRichard Tran Mills 
3933fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3943fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3953fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
397551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
3983fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3993fa15762SRichard Tran Mills   }
4003fa15762SRichard Tran Mills 
401df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
402df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
4039c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
404df555b71SRichard Tran Mills 
405df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
406df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
407df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
408df555b71SRichard Tran Mills   PetscFunctionReturn(0);
409df555b71SRichard Tran Mills }
410d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
411df555b71SRichard Tran Mills 
412e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
413ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
414ff03dc53SRichard Tran Mills {
415ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
416ff03dc53SRichard Tran Mills   const PetscScalar *x;
417ff03dc53SRichard Tran Mills   PetscScalar       *y;
418ff03dc53SRichard Tran Mills   const MatScalar   *aa;
419ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
420ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
421db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
422db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
423db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
424ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
425db63039fSRichard Tran Mills   char              matdescra[6];
426ff03dc53SRichard Tran Mills 
427ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
428ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4294a2a386eSRichard Tran Mills 
4304a2a386eSRichard Tran Mills   PetscFunctionBegin;
431969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
432969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4334a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4344a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4354a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4364a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4374a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4384a2a386eSRichard Tran Mills 
4394a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
440db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4414a2a386eSRichard Tran Mills 
4424a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4434a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4444a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4454a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4464a2a386eSRichard Tran Mills }
4471950a7e7SRichard Tran Mills #endif
4484a2a386eSRichard Tran Mills 
449ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
450df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
451df555b71SRichard Tran Mills {
452df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
453df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
454df555b71SRichard Tran Mills   const PetscScalar *x;
455df555b71SRichard Tran Mills   PetscScalar       *y;
456df555b71SRichard Tran Mills   PetscErrorCode    ierr;
4570632b357SRichard Tran Mills   sparse_status_t   stat;
458551aa5c8SRichard Tran Mills   PetscObjectState  state;
459df555b71SRichard Tran Mills 
460df555b71SRichard Tran Mills   PetscFunctionBegin;
461df555b71SRichard Tran Mills 
46238987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
46338987b35SRichard Tran Mills   if(!a->nz) {
46438987b35SRichard Tran Mills     PetscInt i;
46538987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
46638987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
46738987b35SRichard Tran Mills     for (i=0; i<n; i++) {
46838987b35SRichard Tran Mills       y[i] = 0.0;
46938987b35SRichard Tran Mills     }
47038987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
47138987b35SRichard Tran Mills     PetscFunctionReturn(0);
47238987b35SRichard Tran Mills   }
473f36dfe3fSRichard Tran Mills 
474df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
475df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
476df555b71SRichard Tran Mills 
4773fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4783fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4793fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
480551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
481551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4823fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4833fa15762SRichard Tran Mills   }
4843fa15762SRichard Tran Mills 
485df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
486df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
4879c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
488df555b71SRichard Tran Mills 
489df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
490df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
491df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
492df555b71SRichard Tran Mills   PetscFunctionReturn(0);
493df555b71SRichard Tran Mills }
494d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
495df555b71SRichard Tran Mills 
496e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
4974a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4984a2a386eSRichard Tran Mills {
4994a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5004a2a386eSRichard Tran Mills   const PetscScalar *x;
5014a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5024a2a386eSRichard Tran Mills   const MatScalar   *aa;
5034a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5044a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
505db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
5064a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5074a2a386eSRichard Tran Mills   PetscInt          i;
5084a2a386eSRichard Tran Mills 
509ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
510ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
511a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
512db63039fSRichard Tran Mills   PetscScalar       beta;
513a84739b8SRichard Tran Mills   char              matdescra[6];
514ff03dc53SRichard Tran Mills 
515ff03dc53SRichard Tran Mills   PetscFunctionBegin;
516a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
517a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
518a84739b8SRichard Tran Mills 
519ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
520ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
521ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
522ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
523ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
524ff03dc53SRichard Tran Mills 
525ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
526a84739b8SRichard Tran Mills   if (zz == yy) {
527a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
528db63039fSRichard Tran Mills     beta = 1.0;
529db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
530a84739b8SRichard Tran Mills   } else {
531db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
532db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
533db63039fSRichard Tran Mills     beta = 0.0;
534db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
535ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
536ff03dc53SRichard Tran Mills       z[i] += y[i];
537ff03dc53SRichard Tran Mills     }
538a84739b8SRichard Tran Mills   }
539ff03dc53SRichard Tran Mills 
540ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
541ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
542ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
543ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
544ff03dc53SRichard Tran Mills }
5451950a7e7SRichard Tran Mills #endif
546ff03dc53SRichard Tran Mills 
547ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
548df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
549df555b71SRichard Tran Mills {
550df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
551df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
552df555b71SRichard Tran Mills   const PetscScalar *x;
553df555b71SRichard Tran Mills   PetscScalar       *y,*z;
554df555b71SRichard Tran Mills   PetscErrorCode    ierr;
555df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
556df555b71SRichard Tran Mills   PetscInt          i;
557df555b71SRichard Tran Mills 
558df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
559df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
560551aa5c8SRichard Tran Mills   PetscObjectState  state;
561df555b71SRichard Tran Mills 
562df555b71SRichard Tran Mills   PetscFunctionBegin;
563df555b71SRichard Tran Mills 
56438987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
56538987b35SRichard Tran Mills   if(!a->nz) {
56638987b35SRichard Tran Mills     PetscInt i;
56738987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
56838987b35SRichard Tran Mills     for (i=0; i<m; i++) {
56938987b35SRichard Tran Mills       z[i] = y[i];
57038987b35SRichard Tran Mills     }
57138987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
57238987b35SRichard Tran Mills     PetscFunctionReturn(0);
57338987b35SRichard Tran Mills   }
574df555b71SRichard Tran Mills 
575df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
576df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
577df555b71SRichard Tran Mills 
5783fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5793fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5803fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
581551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
582551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5833fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5843fa15762SRichard Tran Mills   }
5853fa15762SRichard Tran Mills 
586df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
587df555b71SRichard Tran Mills   if (zz == yy) {
588df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
589df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
590db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
5919c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
592df555b71SRichard Tran Mills   } else {
593df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
594df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
595db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
5969c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
597df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
598df555b71SRichard Tran Mills       z[i] += y[i];
599df555b71SRichard Tran Mills     }
600df555b71SRichard Tran Mills   }
601df555b71SRichard Tran Mills 
602df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
603df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
604df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
605df555b71SRichard Tran Mills   PetscFunctionReturn(0);
606df555b71SRichard Tran Mills }
607d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
608df555b71SRichard Tran Mills 
609e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
610ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
611ff03dc53SRichard Tran Mills {
612ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
613ff03dc53SRichard Tran Mills   const PetscScalar *x;
614ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
615ff03dc53SRichard Tran Mills   const MatScalar   *aa;
616ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
617ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
618db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
619ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
620ff03dc53SRichard Tran Mills   PetscInt          i;
621ff03dc53SRichard Tran Mills 
622ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
623ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
624a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
625db63039fSRichard Tran Mills   PetscScalar       beta;
626a84739b8SRichard Tran Mills   char              matdescra[6];
6274a2a386eSRichard Tran Mills 
6284a2a386eSRichard Tran Mills   PetscFunctionBegin;
629a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
630a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
631a84739b8SRichard Tran Mills 
6324a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6334a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6344a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6354a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6364a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6374a2a386eSRichard Tran Mills 
6384a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
639a84739b8SRichard Tran Mills   if (zz == yy) {
640a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
641db63039fSRichard Tran Mills     beta = 1.0;
642969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
643a84739b8SRichard Tran Mills   } else {
644db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
645db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
646db63039fSRichard Tran Mills     beta = 0.0;
647db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
648969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6494a2a386eSRichard Tran Mills       z[i] += y[i];
6504a2a386eSRichard Tran Mills     }
651a84739b8SRichard Tran Mills   }
6524a2a386eSRichard Tran Mills 
6534a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
6544a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6554a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6564a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6574a2a386eSRichard Tran Mills }
6581950a7e7SRichard Tran Mills #endif
6594a2a386eSRichard Tran Mills 
660ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
661df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
662df555b71SRichard Tran Mills {
663df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
665df555b71SRichard Tran Mills   const PetscScalar *x;
666df555b71SRichard Tran Mills   PetscScalar       *y,*z;
667df555b71SRichard Tran Mills   PetscErrorCode    ierr;
668969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
669df555b71SRichard Tran Mills   PetscInt          i;
670551aa5c8SRichard Tran Mills   PetscObjectState  state;
671df555b71SRichard Tran Mills 
672df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
673df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
674df555b71SRichard Tran Mills 
675df555b71SRichard Tran Mills   PetscFunctionBegin;
676df555b71SRichard Tran Mills 
67738987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
67838987b35SRichard Tran Mills   if(!a->nz) {
67938987b35SRichard Tran Mills     PetscInt i;
68038987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
68138987b35SRichard Tran Mills     for (i=0; i<n; i++) {
68238987b35SRichard Tran Mills       z[i] = y[i];
68338987b35SRichard Tran Mills     }
68438987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
68538987b35SRichard Tran Mills     PetscFunctionReturn(0);
68638987b35SRichard Tran Mills   }
687f36dfe3fSRichard Tran Mills 
688df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
689df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
690df555b71SRichard Tran Mills 
6913fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6923fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6933fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
694551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
695551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6963fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6973fa15762SRichard Tran Mills   }
6983fa15762SRichard Tran Mills 
699df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
700df555b71SRichard Tran Mills   if (zz == yy) {
701df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
702df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
703db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
7049c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705df555b71SRichard Tran Mills   } else {
706df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
707df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
708db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
7099c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
710969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
711df555b71SRichard Tran Mills       z[i] += y[i];
712df555b71SRichard Tran Mills     }
713df555b71SRichard Tran Mills   }
714df555b71SRichard Tran Mills 
715df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
716df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
717df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
718df555b71SRichard Tran Mills   PetscFunctionReturn(0);
719df555b71SRichard Tran Mills }
720d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
721df555b71SRichard Tran Mills 
7228a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
723*431879ecSRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,PetscReal fill,Mat C)
724*431879ecSRichard Tran Mills {
725*431879ecSRichard Tran Mills   Mat_SeqAIJMKL       *a, *b, *c;
726*431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
727*431879ecSRichard Tran Mills   PetscErrorCode      ierr;
728*431879ecSRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
729*431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
730*431879ecSRichard Tran Mills   PetscObjectState    state;
731*431879ecSRichard Tran Mills 
732*431879ecSRichard Tran Mills   PetscFunctionBegin;
733*431879ecSRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
734*431879ecSRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
735*431879ecSRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
736*431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
737*431879ecSRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
738*431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
739*431879ecSRichard Tran Mills   }
740*431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
741*431879ecSRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
742*431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
743*431879ecSRichard Tran Mills   }
744*431879ecSRichard Tran Mills   csrA = a->csrA;
745*431879ecSRichard Tran Mills   csrB = b->csrA;
746*431879ecSRichard Tran Mills   csrC = c->csrA;
747*431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
748*431879ecSRichard Tran Mills 
749*431879ecSRichard Tran Mills   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
750*431879ecSRichard Tran Mills                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
751*431879ecSRichard Tran Mills                          SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
752*431879ecSRichard Tran Mills 
753*431879ecSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply");
754*431879ecSRichard Tran Mills 
755*431879ecSRichard Tran Mills   PetscFunctionReturn(0);
756*431879ecSRichard Tran Mills }
757*431879ecSRichard Tran Mills 
758e8be1fc7SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
759e8be1fc7SRichard Tran Mills {
760e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *a, *b, *c;
761e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
762e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
763e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
764e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
765e8be1fc7SRichard Tran Mills   PetscObjectState    state;
766e8be1fc7SRichard Tran Mills 
767e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
768e8be1fc7SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
769e8be1fc7SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
770e8be1fc7SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
771e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
772e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
773e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
774e8be1fc7SRichard Tran Mills   }
775e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
776e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
777e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
778e8be1fc7SRichard Tran Mills   }
779e8be1fc7SRichard Tran Mills   csrA = a->csrA;
780e8be1fc7SRichard Tran Mills   csrB = b->csrA;
781e8be1fc7SRichard Tran Mills   csrC = c->csrA;
782e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
783e8be1fc7SRichard Tran Mills 
784e8be1fc7SRichard Tran Mills   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
785e8be1fc7SRichard Tran Mills                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
786e8be1fc7SRichard Tran Mills                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
787e8be1fc7SRichard Tran Mills 
788e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
789e8be1fc7SRichard Tran Mills 
790e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7914f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
792e8be1fc7SRichard Tran Mills 
793e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
794e8be1fc7SRichard Tran Mills }
795e8be1fc7SRichard Tran Mills 
7964f53af40SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
7974f53af40SRichard Tran Mills {
7984f53af40SRichard Tran Mills   Mat_SeqAIJMKL       *a, *p, *c;
7994f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8004f53af40SRichard Tran Mills   PetscBool           set, flag;
8014f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
802b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8034f53af40SRichard Tran Mills   PetscObjectState    state;
8044f53af40SRichard Tran Mills   PetscErrorCode      ierr;
8054f53af40SRichard Tran Mills 
8064f53af40SRichard Tran Mills   PetscFunctionBegin;
8074f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
8084f53af40SRichard Tran Mills   if (!set || (set && !flag)) {
8094f53af40SRichard Tran Mills     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
8104f53af40SRichard Tran Mills     PetscFunctionReturn(0);
8114f53af40SRichard Tran Mills   }
8124f53af40SRichard Tran Mills 
8134f53af40SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
8144f53af40SRichard Tran Mills   p = (Mat_SeqAIJMKL*)P->spptr;
8154f53af40SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
8164f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
8174f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
8184f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
8194f53af40SRichard Tran Mills   }
8204f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
8214f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
8224f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
8234f53af40SRichard Tran Mills   }
8244f53af40SRichard Tran Mills   csrA = a->csrA;
8254f53af40SRichard Tran Mills   csrP = p->csrA;
8264f53af40SRichard Tran Mills   csrC = c->csrA;
827b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
828b9e1dd46SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
829b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8304f53af40SRichard Tran Mills 
831f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
832b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
8334f53af40SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
8344f53af40SRichard Tran Mills 
8354f53af40SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8364f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
8374f53af40SRichard Tran Mills 
8384f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8394f53af40SRichard Tran Mills }
840*431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
8414f53af40SRichard Tran Mills 
8424a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
843510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
8444a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
8454a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
8464a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
8474a2a386eSRichard Tran Mills {
8484a2a386eSRichard Tran Mills   PetscErrorCode ierr;
8494a2a386eSRichard Tran Mills   Mat            B = *newmat;
8504a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
851c9d46305SRichard Tran Mills   PetscBool      set;
852e9c94282SRichard Tran Mills   PetscBool      sametype;
8534a2a386eSRichard Tran Mills 
8544a2a386eSRichard Tran Mills   PetscFunctionBegin;
8554a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
8564a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
8574a2a386eSRichard Tran Mills   }
8584a2a386eSRichard Tran Mills 
859e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
860e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
861e9c94282SRichard Tran Mills 
8624a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
8634a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
8644a2a386eSRichard Tran Mills 
865df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
866969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
8674a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
8684a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
8694a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
870c9d46305SRichard Tran Mills 
8714abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
872ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
873d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
874a8327b06SKarl Rupp #else
875d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
876d995685eSRichard Tran Mills #endif
8775b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
8784abfa3b3SRichard Tran Mills 
8794abfa3b3SRichard Tran Mills   /* Parse command line options. */
880c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
88148292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
88248292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
883c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
884ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
885d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
886d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
887d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
888d995685eSRichard Tran Mills   }
889d995685eSRichard Tran Mills #endif
890c9d46305SRichard Tran Mills 
891ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
892df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
893969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
894df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
895969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
8968a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
897*431879ecSRichard Tran Mills   B->ops->matmultsymbolic  = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2;
898e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
899ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
9004f53af40SRichard Tran Mills   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
9014f53af40SRichard Tran Mills #   endif
902e8be1fc7SRichard Tran Mills # endif
9031950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
9041950a7e7SRichard Tran Mills 
905213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
906213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
907213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
908213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
909213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
9101950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
9114a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
912969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
9134a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
914969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
915c9d46305SRichard Tran Mills   }
9161950a7e7SRichard Tran Mills #endif
9174a2a386eSRichard Tran Mills 
9184a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
9194a2a386eSRichard Tran Mills 
9204a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
9214a2a386eSRichard Tran Mills   *newmat = B;
9224a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9234a2a386eSRichard Tran Mills }
9244a2a386eSRichard Tran Mills 
9254a2a386eSRichard Tran Mills /*@C
9264a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
9274a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
9284a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
92990147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
93090147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
931597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
932597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
933597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
93490147e49SRichard Tran Mills 
935d083f849SBarry Smith    Collective
9364a2a386eSRichard Tran Mills 
9374a2a386eSRichard Tran Mills    Input Parameters:
9384a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
9394a2a386eSRichard Tran Mills .  m - number of rows
9404a2a386eSRichard Tran Mills .  n - number of columns
9414a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
9424a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
9434a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
9444a2a386eSRichard Tran Mills 
9454a2a386eSRichard Tran Mills    Output Parameter:
9464a2a386eSRichard Tran Mills .  A - the matrix
9474a2a386eSRichard Tran Mills 
94890147e49SRichard Tran Mills    Options Database Keys:
94966b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
95066b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
95190147e49SRichard Tran Mills 
9524a2a386eSRichard Tran Mills    Notes:
9534a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
9544a2a386eSRichard Tran Mills 
9554a2a386eSRichard Tran Mills    Level: intermediate
9564a2a386eSRichard Tran Mills 
9574a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
9584a2a386eSRichard Tran Mills @*/
9594a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
9604a2a386eSRichard Tran Mills {
9614a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9624a2a386eSRichard Tran Mills 
9634a2a386eSRichard Tran Mills   PetscFunctionBegin;
9644a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
9654a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
9664a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
9674a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
9684a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9694a2a386eSRichard Tran Mills }
9704a2a386eSRichard Tran Mills 
9714a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
9724a2a386eSRichard Tran Mills {
9734a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9744a2a386eSRichard Tran Mills 
9754a2a386eSRichard Tran Mills   PetscFunctionBegin;
9764a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
9774a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
9784a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9794a2a386eSRichard Tran Mills }
980