xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 551aa5c87513b3dd66216044ca44a3df6d732447)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
175b49642aSRichard Tran Mills   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
184abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19*551aa5c8SRichard Tran Mills   PetscObjectState state;
20b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22df555b71SRichard Tran Mills   struct matrix_descr descr;
23b8cbc1fbSRichard Tran Mills #endif
244a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
254a2a386eSRichard Tran Mills 
264a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
274a2a386eSRichard Tran Mills 
284a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
294a2a386eSRichard Tran Mills {
304a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
314a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
324a2a386eSRichard Tran Mills   PetscErrorCode ierr;
334a2a386eSRichard Tran Mills   Mat            B       = *newmat;
34c1d5218aSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
354a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36c1d5218aSRichard Tran Mills #endif
374a2a386eSRichard Tran Mills 
384a2a386eSRichard Tran Mills   PetscFunctionBegin;
394a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
404a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
414a2a386eSRichard Tran Mills   }
424a2a386eSRichard Tran Mills 
434a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4454871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
464a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4754871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
48ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4954871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
50ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
5145fbe478SRichard Tran Mills   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52372ec6bbSRichard Tran Mills   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
5387c2a1d7SRichard Tran Mills   B->ops->scale            = MatScale_SeqAIJ;
5487c2a1d7SRichard Tran Mills   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
5587c2a1d7SRichard Tran Mills   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
5687c2a1d7SRichard Tran Mills   B->ops->axpy             = MatAXPY_SeqAIJ;
574a2a386eSRichard Tran Mills 
58e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
59e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
6245fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
634a940b00SSatish Balay   if(!aijmkl->no_SpMV2) {
6445fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
6645fbe478SRichard Tran Mills   }
67e9c94282SRichard Tran Mills 
684abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
69e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
70e9c94282SRichard Tran Mills    * the spptr pointer. */
71a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
72a8327b06SKarl Rupp 
734abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
740632b357SRichard Tran Mills     sparse_status_t stat;
754abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
764abfa3b3SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
774abfa3b3SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
784abfa3b3SRichard Tran Mills     }
794abfa3b3SRichard Tran Mills   }
804abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
824a2a386eSRichard Tran Mills 
834a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
844a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills   *newmat = B;
874a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
884a2a386eSRichard Tran Mills }
894a2a386eSRichard Tran Mills 
904a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
914a2a386eSRichard Tran Mills {
924a2a386eSRichard Tran Mills   PetscErrorCode ierr;
934a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
944a2a386eSRichard Tran Mills 
954a2a386eSRichard Tran Mills   PetscFunctionBegin;
96e9c94282SRichard Tran Mills 
97e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98e9c94282SRichard Tran Mills    * spptr pointer. */
99e9c94282SRichard Tran Mills   if (aijmkl) {
1004a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
1014abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1024abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
1034abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1044abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
1054abfa3b3SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) {
1064abfa3b3SRichard Tran Mills         PetscFunctionReturn(PETSC_ERR_LIB);
1074abfa3b3SRichard Tran Mills       }
1084abfa3b3SRichard Tran Mills     }
1094abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1104a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
111e9c94282SRichard Tran Mills   }
1124a2a386eSRichard Tran Mills 
1134a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1144a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1154a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1164a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1174a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1184a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1194a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1204a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1214a2a386eSRichard Tran Mills }
1224a2a386eSRichard Tran Mills 
1235b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1245b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1255b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1265b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1275b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1285b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1295b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1306e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1314a2a386eSRichard Tran Mills {
1326e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1336e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1346e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1356e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
13645fbe478SRichard Tran Mills   PetscFunctionBegin;
1376e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1386e369cd5SRichard Tran Mills #else
139a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
140a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
141a8327b06SKarl Rupp   PetscInt         m,n;
142a8327b06SKarl Rupp   MatScalar        *aa;
143a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1446e369cd5SRichard Tran Mills   sparse_status_t  stat;
145*551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
146*551aa5c8SRichard Tran Mills   PetscObjectState state;
1474a2a386eSRichard Tran Mills 
148a8327b06SKarl Rupp   PetscFunctionBegin;
1496e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1506e369cd5SRichard Tran Mills 
1510632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1520632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1530632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1540632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1550632b357SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
1560632b357SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
1570632b357SRichard Tran Mills     }
1580632b357SRichard Tran Mills   }
1598d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1606e369cd5SRichard Tran Mills 
161c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
162df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
163df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
164df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
16558678438SRichard Tran Mills   m = A->rmap->n;
16658678438SRichard Tran Mills   n = A->cmap->n;
167df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
168df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
169df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
17080095d54SIrina Sokolova   if ((a->nz!=0) & !(A->structure_only)) {
1718d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1728d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17358678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
174df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
175df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
176df555b71SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
177df555b71SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
178f68ad4bdSRichard Tran Mills       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
179df555b71SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
180df555b71SRichard Tran Mills     }
1814abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
182c9d46305SRichard Tran Mills   }
183*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1846e369cd5SRichard Tran Mills 
1856e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
186d995685eSRichard Tran Mills #endif
1876e369cd5SRichard Tran Mills }
1886e369cd5SRichard Tran Mills 
18919afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
19019afcda9SRichard Tran Mills  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
1916c87cf42SRichard Tran Mills  * matrix handle.
192aab60f1bSRichard Tran Mills  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
193aab60f1bSRichard Tran Mills  * there is no good alternative. */
19419afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1956c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
19619afcda9SRichard Tran Mills {
19719afcda9SRichard Tran Mills   PetscErrorCode ierr;
19819afcda9SRichard Tran Mills   sparse_status_t stat;
19919afcda9SRichard Tran Mills   sparse_index_base_t indexing;
20019afcda9SRichard Tran Mills   PetscInt nrows, ncols;
20145fbe478SRichard Tran Mills   PetscInt *aj,*ai,*dummy;
20219afcda9SRichard Tran Mills   MatScalar *aa;
20319afcda9SRichard Tran Mills   Mat A;
2046c87cf42SRichard Tran Mills   Mat_SeqAIJ *a;
20519afcda9SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
20619afcda9SRichard Tran Mills 
20745fbe478SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
20845fbe478SRichard Tran Mills   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
20919afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
21019afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
21119afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
21219afcda9SRichard Tran Mills   }
2136c87cf42SRichard Tran Mills 
214aab60f1bSRichard Tran Mills   if (reuse == MAT_REUSE_MATRIX) {
215aab60f1bSRichard Tran Mills     ierr = MatDestroy(mat);CHKERRQ(ierr);
216aab60f1bSRichard Tran Mills   }
21719afcda9SRichard Tran Mills   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
21819afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
21945fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
220aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
221aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
222aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
223aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
22419afcda9SRichard Tran Mills   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
22519afcda9SRichard Tran Mills 
22619afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22719afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22819afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2296c87cf42SRichard Tran Mills 
2306c87cf42SRichard Tran Mills   a = (Mat_SeqAIJ*)A->data;
23119afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
23219afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2336c87cf42SRichard Tran Mills 
23419afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
23519afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23619afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
237f3fd1758SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
238f3fd1758SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
239f3fd1758SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
24019afcda9SRichard Tran Mills   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
24119afcda9SRichard Tran Mills   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
24219afcda9SRichard Tran Mills   stat = mkl_sparse_optimize(aijmkl->csrA);
24319afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
24419afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
24519afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
24619afcda9SRichard Tran Mills   }
24719afcda9SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
24819afcda9SRichard Tran Mills 
24919afcda9SRichard Tran Mills   *mat = A;
25019afcda9SRichard Tran Mills   PetscFunctionReturn(0);
25119afcda9SRichard Tran Mills }
25219afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
25319afcda9SRichard Tran Mills 
2546e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2556e369cd5SRichard Tran Mills {
2566e369cd5SRichard Tran Mills   PetscErrorCode ierr;
2576e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2586e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
2596e369cd5SRichard Tran Mills 
2606e369cd5SRichard Tran Mills   PetscFunctionBegin;
2616e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
2626e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
2636e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
2646e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
2656e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
2665b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2676e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2685b49642aSRichard Tran Mills   }
2696e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
2706e369cd5SRichard Tran Mills }
2716e369cd5SRichard Tran Mills 
2726e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
2736e369cd5SRichard Tran Mills {
2746e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
2756e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
2765b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2776e369cd5SRichard Tran Mills 
2786e369cd5SRichard Tran Mills   PetscFunctionBegin;
2796e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2806e369cd5SRichard Tran Mills 
2816e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
2826e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
2836e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
2846e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
285d96e85feSRichard Tran Mills    * a lot of code duplication. */
2866e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
2876e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
2886e369cd5SRichard Tran Mills 
2895b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
2905b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
2915b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
2925b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2936e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2944a940b00SSatish Balay #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
295886913bfSRichard Tran Mills   } else if (aijmkl->sparse_optimized) {
296886913bfSRichard Tran Mills     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
297886913bfSRichard Tran Mills      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
298886913bfSRichard Tran Mills      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
2997225e97aSRichard Tran Mills      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
300886913bfSRichard Tran Mills     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
301886913bfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
302886913bfSRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
303886913bfSRichard Tran Mills     }
304886913bfSRichard Tran Mills     aijmkl->sparse_optimized = PETSC_FALSE;
3054a940b00SSatish Balay #endif
3065b49642aSRichard Tran Mills   }
307df555b71SRichard Tran Mills 
3084a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3094a2a386eSRichard Tran Mills }
3104a2a386eSRichard Tran Mills 
3114a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3124a2a386eSRichard Tran Mills {
3134a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3144a2a386eSRichard Tran Mills   const PetscScalar *x;
3154a2a386eSRichard Tran Mills   PetscScalar       *y;
3164a2a386eSRichard Tran Mills   const MatScalar   *aa;
3174a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3184a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
319db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
320db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
321db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3224a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
323db63039fSRichard Tran Mills   char              matdescra[6];
324db63039fSRichard Tran Mills 
3254a2a386eSRichard Tran Mills 
3264a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
327ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
328ff03dc53SRichard Tran Mills 
329ff03dc53SRichard Tran Mills   PetscFunctionBegin;
330db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
331db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
332ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
333ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
334ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
335ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
336ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
337ff03dc53SRichard Tran Mills 
338ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
339db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
340ff03dc53SRichard Tran Mills 
341ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
342ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
343ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
344ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
345ff03dc53SRichard Tran Mills }
346ff03dc53SRichard Tran Mills 
347d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
348df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
349df555b71SRichard Tran Mills {
350df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
351df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
352df555b71SRichard Tran Mills   const PetscScalar *x;
353df555b71SRichard Tran Mills   PetscScalar       *y;
354df555b71SRichard Tran Mills   PetscErrorCode    ierr;
355df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
356*551aa5c8SRichard Tran Mills   PetscObjectState  state;
357df555b71SRichard Tran Mills 
358df555b71SRichard Tran Mills   PetscFunctionBegin;
359df555b71SRichard Tran Mills 
36038987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
36138987b35SRichard Tran Mills   if(!a->nz) {
36238987b35SRichard Tran Mills     PetscInt i;
36338987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
36438987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
36538987b35SRichard Tran Mills     for (i=0; i<m; i++) {
36638987b35SRichard Tran Mills       y[i] = 0.0;
36738987b35SRichard Tran Mills     }
36838987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
36938987b35SRichard Tran Mills     PetscFunctionReturn(0);
37038987b35SRichard Tran Mills   }
371f36dfe3fSRichard Tran Mills 
372df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
373df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
374df555b71SRichard Tran Mills 
3753fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3763fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3773fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
378*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
379*551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
3803fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3813fa15762SRichard Tran Mills   }
3823fa15762SRichard Tran Mills 
383df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
384df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
385df555b71SRichard Tran Mills 
386df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
387df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
388df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
389df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
390df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
391df555b71SRichard Tran Mills   }
392df555b71SRichard Tran Mills   PetscFunctionReturn(0);
393df555b71SRichard Tran Mills }
394d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
395df555b71SRichard Tran Mills 
396ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
397ff03dc53SRichard Tran Mills {
398ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
399ff03dc53SRichard Tran Mills   const PetscScalar *x;
400ff03dc53SRichard Tran Mills   PetscScalar       *y;
401ff03dc53SRichard Tran Mills   const MatScalar   *aa;
402ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
403ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
404db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
405db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
406db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
407ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
408db63039fSRichard Tran Mills   char              matdescra[6];
409ff03dc53SRichard Tran Mills 
410ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
411ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4124a2a386eSRichard Tran Mills 
4134a2a386eSRichard Tran Mills   PetscFunctionBegin;
414969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
415969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4164a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4174a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4184a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4194a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4204a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4214a2a386eSRichard Tran Mills 
4224a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
423db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4244a2a386eSRichard Tran Mills 
4254a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4264a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4274a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4284a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4294a2a386eSRichard Tran Mills }
4304a2a386eSRichard Tran Mills 
431d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
432df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
433df555b71SRichard Tran Mills {
434df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
435df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
436df555b71SRichard Tran Mills   const PetscScalar *x;
437df555b71SRichard Tran Mills   PetscScalar       *y;
438df555b71SRichard Tran Mills   PetscErrorCode    ierr;
4390632b357SRichard Tran Mills   sparse_status_t   stat;
440*551aa5c8SRichard Tran Mills   PetscObjectState  state;
441df555b71SRichard Tran Mills 
442df555b71SRichard Tran Mills   PetscFunctionBegin;
443df555b71SRichard Tran Mills 
44438987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
44538987b35SRichard Tran Mills   if(!a->nz) {
44638987b35SRichard Tran Mills     PetscInt i;
44738987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
44838987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
44938987b35SRichard Tran Mills     for (i=0; i<n; i++) {
45038987b35SRichard Tran Mills       y[i] = 0.0;
45138987b35SRichard Tran Mills     }
45238987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
45338987b35SRichard Tran Mills     PetscFunctionReturn(0);
45438987b35SRichard Tran Mills   }
455f36dfe3fSRichard Tran Mills 
456df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
457df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
458df555b71SRichard Tran Mills 
4593fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4603fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4613fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
462*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
463*551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4643fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4653fa15762SRichard Tran Mills   }
4663fa15762SRichard Tran Mills 
467df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
468df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
469df555b71SRichard Tran Mills 
470df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
471df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
472df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
473df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
474df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
475df555b71SRichard Tran Mills   }
476df555b71SRichard Tran Mills   PetscFunctionReturn(0);
477df555b71SRichard Tran Mills }
478d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
479df555b71SRichard Tran Mills 
4804a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4814a2a386eSRichard Tran Mills {
4824a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
4834a2a386eSRichard Tran Mills   const PetscScalar *x;
4844a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
4854a2a386eSRichard Tran Mills   const MatScalar   *aa;
4864a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
4874a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
488db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
4894a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
4904a2a386eSRichard Tran Mills   PetscInt          i;
4914a2a386eSRichard Tran Mills 
492ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
493ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
494a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
495db63039fSRichard Tran Mills   PetscScalar       beta;
496a84739b8SRichard Tran Mills   char              matdescra[6];
497ff03dc53SRichard Tran Mills 
498ff03dc53SRichard Tran Mills   PetscFunctionBegin;
499a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
500a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
501a84739b8SRichard Tran Mills 
502ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
503ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
504ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
505ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
506ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
507ff03dc53SRichard Tran Mills 
508ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
509a84739b8SRichard Tran Mills   if (zz == yy) {
510a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
511db63039fSRichard Tran Mills     beta = 1.0;
512db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
513a84739b8SRichard Tran Mills   } else {
514db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
515db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
516db63039fSRichard Tran Mills     beta = 0.0;
517db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
518ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
519ff03dc53SRichard Tran Mills       z[i] += y[i];
520ff03dc53SRichard Tran Mills     }
521a84739b8SRichard Tran Mills   }
522ff03dc53SRichard Tran Mills 
523ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
524ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
525ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
526ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
527ff03dc53SRichard Tran Mills }
528ff03dc53SRichard Tran Mills 
529d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
530df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
531df555b71SRichard Tran Mills {
532df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
533df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
534df555b71SRichard Tran Mills   const PetscScalar *x;
535df555b71SRichard Tran Mills   PetscScalar       *y,*z;
536df555b71SRichard Tran Mills   PetscErrorCode    ierr;
537df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
538df555b71SRichard Tran Mills   PetscInt          i;
539df555b71SRichard Tran Mills 
540df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
541df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
542*551aa5c8SRichard Tran Mills   PetscObjectState  state;
543df555b71SRichard Tran Mills 
544df555b71SRichard Tran Mills   PetscFunctionBegin;
545df555b71SRichard Tran Mills 
54638987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
54738987b35SRichard Tran Mills   if(!a->nz) {
54838987b35SRichard Tran Mills     PetscInt i;
54938987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
55038987b35SRichard Tran Mills     for (i=0; i<m; i++) {
55138987b35SRichard Tran Mills       z[i] = y[i];
55238987b35SRichard Tran Mills     }
55338987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
55438987b35SRichard Tran Mills     PetscFunctionReturn(0);
55538987b35SRichard Tran Mills   }
556df555b71SRichard Tran Mills 
557df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
558df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
559df555b71SRichard Tran Mills 
5603fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5613fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5623fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
563*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
564*551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5653fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5663fa15762SRichard Tran Mills   }
5673fa15762SRichard Tran Mills 
568df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
569df555b71SRichard Tran Mills   if (zz == yy) {
570df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
571df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
572db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
573df555b71SRichard Tran Mills   } else {
574df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
575df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
576db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
577df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
578df555b71SRichard Tran Mills       z[i] += y[i];
579df555b71SRichard Tran Mills     }
580df555b71SRichard Tran Mills   }
581df555b71SRichard Tran Mills 
582df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
583df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
584df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
585df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
586df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
587df555b71SRichard Tran Mills   }
588df555b71SRichard Tran Mills   PetscFunctionReturn(0);
589df555b71SRichard Tran Mills }
590d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
591df555b71SRichard Tran Mills 
592ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
593ff03dc53SRichard Tran Mills {
594ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
595ff03dc53SRichard Tran Mills   const PetscScalar *x;
596ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
597ff03dc53SRichard Tran Mills   const MatScalar   *aa;
598ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
599ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
600db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
601ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
602ff03dc53SRichard Tran Mills   PetscInt          i;
603ff03dc53SRichard Tran Mills 
604ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
605ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
606a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
607db63039fSRichard Tran Mills   PetscScalar       beta;
608a84739b8SRichard Tran Mills   char              matdescra[6];
6094a2a386eSRichard Tran Mills 
6104a2a386eSRichard Tran Mills   PetscFunctionBegin;
611a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
612a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
613a84739b8SRichard Tran Mills 
6144a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6154a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6164a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6174a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6184a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6194a2a386eSRichard Tran Mills 
6204a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
621a84739b8SRichard Tran Mills   if (zz == yy) {
622a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
623db63039fSRichard Tran Mills     beta = 1.0;
624969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
625a84739b8SRichard Tran Mills   } else {
626db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
627db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
628db63039fSRichard Tran Mills     beta = 0.0;
629db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
630969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6314a2a386eSRichard Tran Mills       z[i] += y[i];
6324a2a386eSRichard Tran Mills     }
633a84739b8SRichard Tran Mills   }
6344a2a386eSRichard Tran Mills 
6354a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
6364a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6374a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6384a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6394a2a386eSRichard Tran Mills }
6404a2a386eSRichard Tran Mills 
641d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
642df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
643df555b71SRichard Tran Mills {
644df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
645df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
646df555b71SRichard Tran Mills   const PetscScalar *x;
647df555b71SRichard Tran Mills   PetscScalar       *y,*z;
648df555b71SRichard Tran Mills   PetscErrorCode    ierr;
649969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
650df555b71SRichard Tran Mills   PetscInt          i;
651*551aa5c8SRichard Tran Mills   PetscObjectState  state;
652df555b71SRichard Tran Mills 
653df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
654df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
655df555b71SRichard Tran Mills 
656df555b71SRichard Tran Mills   PetscFunctionBegin;
657df555b71SRichard Tran Mills 
65838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
65938987b35SRichard Tran Mills   if(!a->nz) {
66038987b35SRichard Tran Mills     PetscInt i;
66138987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
66238987b35SRichard Tran Mills     for (i=0; i<n; i++) {
66338987b35SRichard Tran Mills       z[i] = y[i];
66438987b35SRichard Tran Mills     }
66538987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
66638987b35SRichard Tran Mills     PetscFunctionReturn(0);
66738987b35SRichard Tran Mills   }
668f36dfe3fSRichard Tran Mills 
669df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
670df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
671df555b71SRichard Tran Mills 
6723fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6733fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6743fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
675*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
676*551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6773fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6783fa15762SRichard Tran Mills   }
6793fa15762SRichard Tran Mills 
680df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
681df555b71SRichard Tran Mills   if (zz == yy) {
682df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
683df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
684db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
685df555b71SRichard Tran Mills   } else {
686df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
687df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
688db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
689969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
690df555b71SRichard Tran Mills       z[i] += y[i];
691df555b71SRichard Tran Mills     }
692df555b71SRichard Tran Mills   }
693df555b71SRichard Tran Mills 
694df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
695df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
696df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
697df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
698df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
699df555b71SRichard Tran Mills   }
700df555b71SRichard Tran Mills   PetscFunctionReturn(0);
701df555b71SRichard Tran Mills }
702d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
703df555b71SRichard Tran Mills 
70445fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
705aab60f1bSRichard Tran Mills /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
706aab60f1bSRichard Tran Mills  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
707aab60f1bSRichard Tran Mills  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
708aab60f1bSRichard Tran Mills  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
709aab60f1bSRichard Tran Mills  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
710aab60f1bSRichard Tran Mills  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
71145fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
71245fbe478SRichard Tran Mills {
71345fbe478SRichard Tran Mills   Mat_SeqAIJMKL    *a, *b;
71445fbe478SRichard Tran Mills   sparse_matrix_t  csrA, csrB, csrC;
71545fbe478SRichard Tran Mills   PetscErrorCode   ierr;
71645fbe478SRichard Tran Mills   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
717*551aa5c8SRichard Tran Mills   PetscObjectState state;
71845fbe478SRichard Tran Mills 
71945fbe478SRichard Tran Mills   PetscFunctionBegin;
72045fbe478SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
72145fbe478SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
722*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
723*551aa5c8SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
72445fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
72545fbe478SRichard Tran Mills   }
726*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
727*551aa5c8SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
72845fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
72945fbe478SRichard Tran Mills   }
73045fbe478SRichard Tran Mills   csrA = a->csrA;
73145fbe478SRichard Tran Mills   csrB = b->csrA;
73245fbe478SRichard Tran Mills 
73345fbe478SRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
73445fbe478SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
73545fbe478SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
73645fbe478SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
73745fbe478SRichard Tran Mills   }
73845fbe478SRichard Tran Mills 
7396c87cf42SRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
74045fbe478SRichard Tran Mills 
74145fbe478SRichard Tran Mills   PetscFunctionReturn(0);
74245fbe478SRichard Tran Mills }
74345fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
74445fbe478SRichard Tran Mills 
745372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
746372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
747372ec6bbSRichard Tran Mills {
748372ec6bbSRichard Tran Mills   Mat_SeqAIJMKL    *a, *b;
749372ec6bbSRichard Tran Mills   sparse_matrix_t  csrA, csrB, csrC;
750372ec6bbSRichard Tran Mills   PetscErrorCode   ierr;
751372ec6bbSRichard Tran Mills   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
752*551aa5c8SRichard Tran Mills   PetscObjectState state;
753372ec6bbSRichard Tran Mills 
754372ec6bbSRichard Tran Mills   PetscFunctionBegin;
755372ec6bbSRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
756372ec6bbSRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
757*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
758*551aa5c8SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
759372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
760372ec6bbSRichard Tran Mills   }
761*551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
762*551aa5c8SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
763372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
764372ec6bbSRichard Tran Mills   }
765372ec6bbSRichard Tran Mills   csrA = a->csrA;
766372ec6bbSRichard Tran Mills   csrB = b->csrA;
767372ec6bbSRichard Tran Mills 
768372ec6bbSRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
769372ec6bbSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
770372ec6bbSRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
771372ec6bbSRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
772372ec6bbSRichard Tran Mills   }
773372ec6bbSRichard Tran Mills 
774372ec6bbSRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
775372ec6bbSRichard Tran Mills 
776372ec6bbSRichard Tran Mills   PetscFunctionReturn(0);
777372ec6bbSRichard Tran Mills }
778372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
779372ec6bbSRichard Tran Mills 
78087c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
781db63039fSRichard Tran Mills {
782db63039fSRichard Tran Mills   PetscErrorCode ierr;
783db63039fSRichard Tran Mills 
78487c2a1d7SRichard Tran Mills   PetscFunctionBegin;
785db63039fSRichard Tran Mills   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
786db63039fSRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
787db63039fSRichard Tran Mills   PetscFunctionReturn(0);
788db63039fSRichard Tran Mills }
789df555b71SRichard Tran Mills 
79087c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
79187c2a1d7SRichard Tran Mills {
79287c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
79387c2a1d7SRichard Tran Mills 
79487c2a1d7SRichard Tran Mills   PetscFunctionBegin;
79587c2a1d7SRichard Tran Mills   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
79687c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
79787c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
79887c2a1d7SRichard Tran Mills }
79987c2a1d7SRichard Tran Mills 
80087c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
80187c2a1d7SRichard Tran Mills {
80287c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
80387c2a1d7SRichard Tran Mills 
80487c2a1d7SRichard Tran Mills   PetscFunctionBegin;
80587c2a1d7SRichard Tran Mills   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
80687c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
80787c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
80887c2a1d7SRichard Tran Mills }
80987c2a1d7SRichard Tran Mills 
81087c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
81187c2a1d7SRichard Tran Mills {
81287c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
81387c2a1d7SRichard Tran Mills 
81487c2a1d7SRichard Tran Mills   PetscFunctionBegin;
81587c2a1d7SRichard Tran Mills   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
81687c2a1d7SRichard Tran Mills   if (str == SAME_NONZERO_PATTERN) {
81787c2a1d7SRichard Tran Mills     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
81887c2a1d7SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
81987c2a1d7SRichard Tran Mills   }
82087c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
82187c2a1d7SRichard Tran Mills }
82287c2a1d7SRichard Tran Mills 
8234a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
8244a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
8254a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
8264a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
8274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
8284a2a386eSRichard Tran Mills {
8294a2a386eSRichard Tran Mills   PetscErrorCode ierr;
8304a2a386eSRichard Tran Mills   Mat            B = *newmat;
8314a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
832c9d46305SRichard Tran Mills   PetscBool      set;
833e9c94282SRichard Tran Mills   PetscBool      sametype;
8344a2a386eSRichard Tran Mills 
8354a2a386eSRichard Tran Mills   PetscFunctionBegin;
8364a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
8374a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
8384a2a386eSRichard Tran Mills   }
8394a2a386eSRichard Tran Mills 
840e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
841e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
842e9c94282SRichard Tran Mills 
8434a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
8444a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
8454a2a386eSRichard Tran Mills 
846df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
847969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
8484a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
8494a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
8504a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
851c9d46305SRichard Tran Mills 
8524abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
853d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
854d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
855a8327b06SKarl Rupp #else
856d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
857d995685eSRichard Tran Mills #endif
8585b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
8594abfa3b3SRichard Tran Mills 
8604abfa3b3SRichard Tran Mills   /* Parse command line options. */
861c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
862c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
8635b49642aSRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
864c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
865d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
866d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
867d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
868d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
869d995685eSRichard Tran Mills   }
870d995685eSRichard Tran Mills #endif
871c9d46305SRichard Tran Mills 
872c9d46305SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
873d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
874df555b71SRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
875969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
876df555b71SRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
877969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
87845fbe478SRichard Tran Mills     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
879a557fde5SRichard Tran Mills     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
880d995685eSRichard Tran Mills #endif
881c9d46305SRichard Tran Mills   } else {
8824a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
883969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
8844a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
885969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
886c9d46305SRichard Tran Mills   }
8874a2a386eSRichard Tran Mills 
888db63039fSRichard Tran Mills   B->ops->scale              = MatScale_SeqAIJMKL;
88987c2a1d7SRichard Tran Mills   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
89087c2a1d7SRichard Tran Mills   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
89187c2a1d7SRichard Tran Mills   B->ops->axpy               = MatAXPY_SeqAIJMKL;
892db63039fSRichard Tran Mills 
893db63039fSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
8944a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
895e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
896e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
897e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
89845fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
89945fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
90045fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
901372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
90245fbe478SRichard Tran Mills #endif
90345fbe478SRichard Tran Mills   }
9044a2a386eSRichard Tran Mills 
9054a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
9064a2a386eSRichard Tran Mills   *newmat = B;
9074a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9084a2a386eSRichard Tran Mills }
9094a2a386eSRichard Tran Mills 
9104a2a386eSRichard Tran Mills /*@C
9114a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
9124a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
9134a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
9143af10221SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
91590147e49SRichard Tran Mills    operations are currently supported.
91690147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
91790147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
91890147e49SRichard Tran Mills 
9194a2a386eSRichard Tran Mills    Collective on MPI_Comm
9204a2a386eSRichard Tran Mills 
9214a2a386eSRichard Tran Mills    Input Parameters:
9224a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
9234a2a386eSRichard Tran Mills .  m - number of rows
9244a2a386eSRichard Tran Mills .  n - number of columns
9254a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
9264a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
9274a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
9284a2a386eSRichard Tran Mills 
9294a2a386eSRichard Tran Mills    Output Parameter:
9304a2a386eSRichard Tran Mills .  A - the matrix
9314a2a386eSRichard Tran Mills 
93290147e49SRichard Tran Mills    Options Database Keys:
93366b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
93466b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
93590147e49SRichard Tran Mills 
9364a2a386eSRichard Tran Mills    Notes:
9374a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
9384a2a386eSRichard Tran Mills 
9394a2a386eSRichard Tran Mills    Level: intermediate
9404a2a386eSRichard Tran Mills 
94190147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel
9424a2a386eSRichard Tran Mills 
9434a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
9444a2a386eSRichard Tran Mills @*/
9454a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
9464a2a386eSRichard Tran Mills {
9474a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9484a2a386eSRichard Tran Mills 
9494a2a386eSRichard Tran Mills   PetscFunctionBegin;
9504a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
9514a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
9524a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
9534a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
9544a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9554a2a386eSRichard Tran Mills }
9564a2a386eSRichard Tran Mills 
9574a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
9584a2a386eSRichard Tran Mills {
9594a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9604a2a386eSRichard Tran Mills 
9614a2a386eSRichard Tran Mills   PetscFunctionBegin;
9624a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
9634a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
9644a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9654a2a386eSRichard Tran Mills }
966