xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 7225e97a43487a62bc92b33f0ee10d2b5383a2bb)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
175b49642aSRichard Tran Mills   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
184abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33a8327b06SKarl Rupp #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35a8327b06SKarl Rupp #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
5045fbe478SRichard Tran Mills   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51372ec6bbSRichard Tran Mills   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
5287c2a1d7SRichard Tran Mills   B->ops->scale            = MatScale_SeqAIJ;
5387c2a1d7SRichard Tran Mills   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
5487c2a1d7SRichard Tran Mills   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
5587c2a1d7SRichard Tran Mills   B->ops->axpy             = MatAXPY_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
6145fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
6245fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
6345fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
6545fbe478SRichard Tran Mills #endif
6645fbe478SRichard Tran Mills   }
67e9c94282SRichard Tran Mills 
684abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
69e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
70e9c94282SRichard Tran Mills    * the spptr pointer. */
714abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
72a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
73a8327b06SKarl Rupp 
744abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
750632b357SRichard Tran Mills     sparse_status_t stat;
764abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
774abfa3b3SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
784abfa3b3SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
794abfa3b3SRichard Tran Mills     }
804abfa3b3SRichard Tran Mills   }
814abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
82e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
834a2a386eSRichard Tran Mills 
844a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
854a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
864a2a386eSRichard Tran Mills 
874a2a386eSRichard Tran Mills   *newmat = B;
884a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
894a2a386eSRichard Tran Mills }
904a2a386eSRichard Tran Mills 
914a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
924a2a386eSRichard Tran Mills {
934a2a386eSRichard Tran Mills   PetscErrorCode ierr;
944a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
954a2a386eSRichard Tran Mills 
964a2a386eSRichard Tran Mills   PetscFunctionBegin;
97e9c94282SRichard Tran Mills 
98e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
99e9c94282SRichard Tran Mills    * spptr pointer. */
100e9c94282SRichard Tran Mills   if (aijmkl) {
1014a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
1024abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1034abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
1044abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1054abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
1064abfa3b3SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) {
1074abfa3b3SRichard Tran Mills         PetscFunctionReturn(PETSC_ERR_LIB);
1084abfa3b3SRichard Tran Mills       }
1094abfa3b3SRichard Tran Mills     }
1104abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1114a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
112e9c94282SRichard Tran Mills   }
1134a2a386eSRichard Tran Mills 
1144a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1154a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1164a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1174a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1184a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1194a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1204a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1214a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1224a2a386eSRichard Tran Mills }
1234a2a386eSRichard Tran Mills 
1245b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1255b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1265b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1275b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1285b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1295b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1305b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1316e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1324a2a386eSRichard Tran Mills {
1336e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1346e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1356e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1366e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
13745fbe478SRichard Tran Mills   PetscFunctionBegin;
1386e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1396e369cd5SRichard Tran Mills #else
140a8327b06SKarl Rupp   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
141a8327b06SKarl Rupp   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
142a8327b06SKarl Rupp   PetscInt        m,n;
143a8327b06SKarl Rupp   MatScalar       *aa;
144a8327b06SKarl Rupp   PetscInt        *aj,*ai;
1456e369cd5SRichard Tran Mills   sparse_status_t stat;
1464a2a386eSRichard Tran Mills 
147a8327b06SKarl Rupp   PetscFunctionBegin;
1486e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1496e369cd5SRichard Tran Mills 
1500632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1510632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1520632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1530632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1540632b357SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
1550632b357SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
1560632b357SRichard Tran Mills     }
1570632b357SRichard Tran Mills   }
1588d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1596e369cd5SRichard Tran Mills 
160c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
161df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
162df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
163df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
16458678438SRichard Tran Mills   m = A->rmap->n;
16558678438SRichard Tran Mills   n = A->cmap->n;
166df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
167df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
168df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
16980095d54SIrina Sokolova   if ((a->nz!=0) & !(A->structure_only)) {
1708d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1718d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17258678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
173df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
174df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175df555b71SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
176df555b71SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
177f68ad4bdSRichard Tran Mills       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
178df555b71SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
179df555b71SRichard Tran Mills     }
1804abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
181c9d46305SRichard Tran Mills   }
1826e369cd5SRichard Tran Mills 
1836e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
184d995685eSRichard Tran Mills #endif
1856e369cd5SRichard Tran Mills }
1866e369cd5SRichard Tran Mills 
18719afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
18819afcda9SRichard Tran Mills  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
1896c87cf42SRichard Tran Mills  * matrix handle.
1906c87cf42SRichard Tran Mills  * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity
1916c87cf42SRichard Tran Mills  * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult()
1926c87cf42SRichard Tran Mills  * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case.
1936c87cf42SRichard Tran Mills  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
1946c87cf42SRichard Tran Mills  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
1956c87cf42SRichard Tran Mills  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
1966c87cf42SRichard Tran Mills  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
19719afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1986c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
19919afcda9SRichard Tran Mills {
20019afcda9SRichard Tran Mills   PetscErrorCode ierr;
20119afcda9SRichard Tran Mills   sparse_status_t stat;
20219afcda9SRichard Tran Mills   sparse_index_base_t indexing;
20319afcda9SRichard Tran Mills   PetscInt nrows, ncols;
20445fbe478SRichard Tran Mills   PetscInt *aj,*ai,*dummy;
20519afcda9SRichard Tran Mills   MatScalar *aa;
20619afcda9SRichard Tran Mills   Mat A;
2076c87cf42SRichard Tran Mills   Mat_SeqAIJ *a;
20819afcda9SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
20919afcda9SRichard Tran Mills 
21045fbe478SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
21145fbe478SRichard Tran Mills   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
21219afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
21319afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
21419afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
21519afcda9SRichard Tran Mills   }
2166c87cf42SRichard Tran Mills 
2176c87cf42SRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
21819afcda9SRichard Tran Mills     ierr = MatCreate(comm,&A);CHKERRQ(ierr);
21919afcda9SRichard Tran Mills     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
22045fbe478SRichard Tran Mills     ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
22119afcda9SRichard Tran Mills     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
22219afcda9SRichard Tran Mills 
22319afcda9SRichard Tran Mills     /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22419afcda9SRichard Tran Mills      * Now turn it into a MATSEQAIJMKL. */
22519afcda9SRichard Tran Mills     ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2266c87cf42SRichard Tran Mills   } else {
2276c87cf42SRichard Tran Mills     A = *mat;
2286c87cf42SRichard Tran Mills   }
2296c87cf42SRichard Tran Mills 
2306c87cf42SRichard Tran Mills   a = (Mat_SeqAIJ*)A->data;
23119afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
2326c87cf42SRichard Tran Mills 
2336c87cf42SRichard Tran Mills   if (reuse == MAT_REUSE_MATRIX) {
2346c87cf42SRichard Tran Mills     /* Need to destroy old MKL handle. */
2356c87cf42SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
2366c87cf42SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
2376c87cf42SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
2386c87cf42SRichard Tran Mills     }
2396c87cf42SRichard Tran Mills 
2406c87cf42SRichard Tran Mills     /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */
2416c87cf42SRichard Tran Mills     ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr);
2426c87cf42SRichard Tran Mills   }
24319afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2446c87cf42SRichard Tran Mills 
24519afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
24619afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
24719afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
248f3fd1758SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
249f3fd1758SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
250f3fd1758SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
25119afcda9SRichard Tran Mills   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
25219afcda9SRichard Tran Mills   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
25319afcda9SRichard Tran Mills   stat = mkl_sparse_optimize(aijmkl->csrA);
25419afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
25519afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
25619afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
25719afcda9SRichard Tran Mills   }
25819afcda9SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
25919afcda9SRichard Tran Mills 
26019afcda9SRichard Tran Mills   *mat = A;
26119afcda9SRichard Tran Mills   PetscFunctionReturn(0);
26219afcda9SRichard Tran Mills }
26319afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
26419afcda9SRichard Tran Mills 
2656e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2666e369cd5SRichard Tran Mills {
2676e369cd5SRichard Tran Mills   PetscErrorCode ierr;
2686e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2696e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
2706e369cd5SRichard Tran Mills 
2716e369cd5SRichard Tran Mills   PetscFunctionBegin;
2726e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
2736e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
2746e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
2756e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
2766e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
2775b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2786e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2795b49642aSRichard Tran Mills   }
2806e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
2816e369cd5SRichard Tran Mills }
2826e369cd5SRichard Tran Mills 
2836e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
2846e369cd5SRichard Tran Mills {
2856e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
2866e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
2875b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2886e369cd5SRichard Tran Mills 
2896e369cd5SRichard Tran Mills   PetscFunctionBegin;
2906e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2916e369cd5SRichard Tran Mills 
2926e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
2936e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
2946e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
2956e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
296d96e85feSRichard Tran Mills    * a lot of code duplication. */
2976e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
2986e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
2996e369cd5SRichard Tran Mills 
3005b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3015b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3025b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
3035b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3046e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
305886913bfSRichard Tran Mills   } else if (aijmkl->sparse_optimized) {
306886913bfSRichard Tran Mills     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
307886913bfSRichard Tran Mills      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
308886913bfSRichard Tran Mills      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
309*7225e97aSRichard Tran Mills      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
310886913bfSRichard Tran Mills     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
311886913bfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
312886913bfSRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
313886913bfSRichard Tran Mills     }
314886913bfSRichard Tran Mills     aijmkl->sparse_optimized = PETSC_FALSE;
3155b49642aSRichard Tran Mills   }
316df555b71SRichard Tran Mills 
3174a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3184a2a386eSRichard Tran Mills }
3194a2a386eSRichard Tran Mills 
3204a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3214a2a386eSRichard Tran Mills {
3224a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3234a2a386eSRichard Tran Mills   const PetscScalar *x;
3244a2a386eSRichard Tran Mills   PetscScalar       *y;
3254a2a386eSRichard Tran Mills   const MatScalar   *aa;
3264a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3274a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
328db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
329db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
330db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3314a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
332db63039fSRichard Tran Mills   char              matdescra[6];
333db63039fSRichard Tran Mills 
3344a2a386eSRichard Tran Mills 
3354a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
336ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
337ff03dc53SRichard Tran Mills 
338ff03dc53SRichard Tran Mills   PetscFunctionBegin;
339db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
340db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
341ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
342ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
343ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
344ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
345ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
346ff03dc53SRichard Tran Mills 
347ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
348db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
349ff03dc53SRichard Tran Mills 
350ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
351ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
352ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
353ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
354ff03dc53SRichard Tran Mills }
355ff03dc53SRichard Tran Mills 
356d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
357df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
358df555b71SRichard Tran Mills {
359df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
360df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
361df555b71SRichard Tran Mills   const PetscScalar *x;
362df555b71SRichard Tran Mills   PetscScalar       *y;
363df555b71SRichard Tran Mills   PetscErrorCode    ierr;
364df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
365df555b71SRichard Tran Mills 
366df555b71SRichard Tran Mills   PetscFunctionBegin;
367df555b71SRichard Tran Mills 
36838987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
36938987b35SRichard Tran Mills   if(!a->nz) {
37038987b35SRichard Tran Mills     PetscInt i;
37138987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
37238987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
37338987b35SRichard Tran Mills     for (i=0; i<m; i++) {
37438987b35SRichard Tran Mills       y[i] = 0.0;
37538987b35SRichard Tran Mills     }
37638987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
37738987b35SRichard Tran Mills     PetscFunctionReturn(0);
37838987b35SRichard Tran Mills   }
379f36dfe3fSRichard Tran Mills 
380df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
381df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
382df555b71SRichard Tran Mills 
3833fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3843fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3853fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3863fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
3873fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3883fa15762SRichard Tran Mills   }
3893fa15762SRichard Tran Mills 
390df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
391df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
392df555b71SRichard Tran Mills 
393df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
394df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
395df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
396df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
397df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
398df555b71SRichard Tran Mills   }
399df555b71SRichard Tran Mills   PetscFunctionReturn(0);
400df555b71SRichard Tran Mills }
401d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402df555b71SRichard Tran Mills 
403ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
404ff03dc53SRichard Tran Mills {
405ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
406ff03dc53SRichard Tran Mills   const PetscScalar *x;
407ff03dc53SRichard Tran Mills   PetscScalar       *y;
408ff03dc53SRichard Tran Mills   const MatScalar   *aa;
409ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
410ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
411db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
412db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
413db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
414ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
415db63039fSRichard Tran Mills   char              matdescra[6];
416ff03dc53SRichard Tran Mills 
417ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
418ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4194a2a386eSRichard Tran Mills 
4204a2a386eSRichard Tran Mills   PetscFunctionBegin;
421969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
422969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4234a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4244a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4254a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4264a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4274a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4284a2a386eSRichard Tran Mills 
4294a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
430db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4314a2a386eSRichard Tran Mills 
4324a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4334a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4344a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4354a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4364a2a386eSRichard Tran Mills }
4374a2a386eSRichard Tran Mills 
438d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
439df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
440df555b71SRichard Tran Mills {
441df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
442df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
443df555b71SRichard Tran Mills   const PetscScalar *x;
444df555b71SRichard Tran Mills   PetscScalar       *y;
445df555b71SRichard Tran Mills   PetscErrorCode    ierr;
4460632b357SRichard Tran Mills   sparse_status_t   stat;
447df555b71SRichard Tran Mills 
448df555b71SRichard Tran Mills   PetscFunctionBegin;
449df555b71SRichard Tran Mills 
45038987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
45138987b35SRichard Tran Mills   if(!a->nz) {
45238987b35SRichard Tran Mills     PetscInt i;
45338987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
45438987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
45538987b35SRichard Tran Mills     for (i=0; i<n; i++) {
45638987b35SRichard Tran Mills       y[i] = 0.0;
45738987b35SRichard Tran Mills     }
45838987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
45938987b35SRichard Tran Mills     PetscFunctionReturn(0);
46038987b35SRichard Tran Mills   }
461f36dfe3fSRichard Tran Mills 
462df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
463df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
464df555b71SRichard Tran Mills 
4653fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4663fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4673fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4683fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
4693fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4703fa15762SRichard Tran Mills   }
4713fa15762SRichard Tran Mills 
472df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
474df555b71SRichard Tran Mills 
475df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
476df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
477df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
478df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
479df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
480df555b71SRichard Tran Mills   }
481df555b71SRichard Tran Mills   PetscFunctionReturn(0);
482df555b71SRichard Tran Mills }
483d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
484df555b71SRichard Tran Mills 
4854a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4864a2a386eSRichard Tran Mills {
4874a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
4884a2a386eSRichard Tran Mills   const PetscScalar *x;
4894a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
4904a2a386eSRichard Tran Mills   const MatScalar   *aa;
4914a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
4924a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
493db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
4944a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
4954a2a386eSRichard Tran Mills   PetscInt          i;
4964a2a386eSRichard Tran Mills 
497ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
498ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
499a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
500db63039fSRichard Tran Mills   PetscScalar       beta;
501a84739b8SRichard Tran Mills   char              matdescra[6];
502ff03dc53SRichard Tran Mills 
503ff03dc53SRichard Tran Mills   PetscFunctionBegin;
504a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
505a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
506a84739b8SRichard Tran Mills 
507ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
508ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
509ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
510ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
511ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
512ff03dc53SRichard Tran Mills 
513ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
514a84739b8SRichard Tran Mills   if (zz == yy) {
515a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
516db63039fSRichard Tran Mills     beta = 1.0;
517db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
518a84739b8SRichard Tran Mills   } else {
519db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
520db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
521db63039fSRichard Tran Mills     beta = 0.0;
522db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
523ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
524ff03dc53SRichard Tran Mills       z[i] += y[i];
525ff03dc53SRichard Tran Mills     }
526a84739b8SRichard Tran Mills   }
527ff03dc53SRichard Tran Mills 
528ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
529ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
530ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
531ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
532ff03dc53SRichard Tran Mills }
533ff03dc53SRichard Tran Mills 
534d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
535df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
536df555b71SRichard Tran Mills {
537df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
538df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
539df555b71SRichard Tran Mills   const PetscScalar *x;
540df555b71SRichard Tran Mills   PetscScalar       *y,*z;
541df555b71SRichard Tran Mills   PetscErrorCode    ierr;
542df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
543df555b71SRichard Tran Mills   PetscInt          i;
544df555b71SRichard Tran Mills 
545df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
546df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
547df555b71SRichard Tran Mills 
548df555b71SRichard Tran Mills   PetscFunctionBegin;
549df555b71SRichard Tran Mills 
55038987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
55138987b35SRichard Tran Mills   if(!a->nz) {
55238987b35SRichard Tran Mills     PetscInt i;
55338987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
55438987b35SRichard Tran Mills     for (i=0; i<m; i++) {
55538987b35SRichard Tran Mills       z[i] = y[i];
55638987b35SRichard Tran Mills     }
55738987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
55838987b35SRichard Tran Mills     PetscFunctionReturn(0);
55938987b35SRichard Tran Mills   }
560df555b71SRichard Tran Mills 
561df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
562df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
563df555b71SRichard Tran Mills 
5643fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5653fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5663fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5673fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
5683fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5693fa15762SRichard Tran Mills   }
5703fa15762SRichard Tran Mills 
571df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
572df555b71SRichard Tran Mills   if (zz == yy) {
573df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
574df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
575db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
576df555b71SRichard Tran Mills   } else {
577df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
578df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
579db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
580df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
581df555b71SRichard Tran Mills       z[i] += y[i];
582df555b71SRichard Tran Mills     }
583df555b71SRichard Tran Mills   }
584df555b71SRichard Tran Mills 
585df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
586df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
587df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
588df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
589df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
590df555b71SRichard Tran Mills   }
591df555b71SRichard Tran Mills   PetscFunctionReturn(0);
592df555b71SRichard Tran Mills }
593d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
594df555b71SRichard Tran Mills 
595ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
596ff03dc53SRichard Tran Mills {
597ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
598ff03dc53SRichard Tran Mills   const PetscScalar *x;
599ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
600ff03dc53SRichard Tran Mills   const MatScalar   *aa;
601ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
602ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
603db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
604ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
605ff03dc53SRichard Tran Mills   PetscInt          i;
606ff03dc53SRichard Tran Mills 
607ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
608ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
609a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
610db63039fSRichard Tran Mills   PetscScalar       beta;
611a84739b8SRichard Tran Mills   char              matdescra[6];
6124a2a386eSRichard Tran Mills 
6134a2a386eSRichard Tran Mills   PetscFunctionBegin;
614a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
615a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
616a84739b8SRichard Tran Mills 
6174a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6184a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6194a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6204a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6214a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6224a2a386eSRichard Tran Mills 
6234a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
624a84739b8SRichard Tran Mills   if (zz == yy) {
625a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
626db63039fSRichard Tran Mills     beta = 1.0;
627969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
628a84739b8SRichard Tran Mills   } else {
629db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
630db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
631db63039fSRichard Tran Mills     beta = 0.0;
632db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
633969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6344a2a386eSRichard Tran Mills       z[i] += y[i];
6354a2a386eSRichard Tran Mills     }
636a84739b8SRichard Tran Mills   }
6374a2a386eSRichard Tran Mills 
6384a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
6394a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6404a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6414a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6424a2a386eSRichard Tran Mills }
6434a2a386eSRichard Tran Mills 
644d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
645df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
646df555b71SRichard Tran Mills {
647df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
648df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
649df555b71SRichard Tran Mills   const PetscScalar *x;
650df555b71SRichard Tran Mills   PetscScalar       *y,*z;
651df555b71SRichard Tran Mills   PetscErrorCode    ierr;
652969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
653df555b71SRichard Tran Mills   PetscInt          i;
654df555b71SRichard Tran Mills 
655df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
656df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
657df555b71SRichard Tran Mills 
658df555b71SRichard Tran Mills   PetscFunctionBegin;
659df555b71SRichard Tran Mills 
66038987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
66138987b35SRichard Tran Mills   if(!a->nz) {
66238987b35SRichard Tran Mills     PetscInt i;
66338987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
66438987b35SRichard Tran Mills     for (i=0; i<n; i++) {
66538987b35SRichard Tran Mills       z[i] = y[i];
66638987b35SRichard Tran Mills     }
66738987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
66838987b35SRichard Tran Mills     PetscFunctionReturn(0);
66938987b35SRichard Tran Mills   }
670f36dfe3fSRichard Tran Mills 
671df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
672df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
673df555b71SRichard Tran Mills 
6743fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6753fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6763fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6773fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
6783fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6793fa15762SRichard Tran Mills   }
6803fa15762SRichard Tran Mills 
681df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
682df555b71SRichard Tran Mills   if (zz == yy) {
683df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
684df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
685db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
686df555b71SRichard Tran Mills   } else {
687df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
688df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
689db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
690969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
691df555b71SRichard Tran Mills       z[i] += y[i];
692df555b71SRichard Tran Mills     }
693df555b71SRichard Tran Mills   }
694df555b71SRichard Tran Mills 
695df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
696df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
697df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
698df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
699df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
700df555b71SRichard Tran Mills   }
701df555b71SRichard Tran Mills   PetscFunctionReturn(0);
702df555b71SRichard Tran Mills }
703d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
704df555b71SRichard Tran Mills 
70545fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
70645fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
70745fbe478SRichard Tran Mills {
70845fbe478SRichard Tran Mills   Mat_SeqAIJMKL *a, *b;
70945fbe478SRichard Tran Mills   sparse_matrix_t csrA, csrB, csrC;
71045fbe478SRichard Tran Mills   PetscErrorCode ierr;
71145fbe478SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
71245fbe478SRichard Tran Mills 
71345fbe478SRichard Tran Mills   PetscFunctionBegin;
71445fbe478SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
71545fbe478SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
71645fbe478SRichard Tran Mills   if (!a->sparse_optimized) {
71745fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
71845fbe478SRichard Tran Mills   }
71945fbe478SRichard Tran Mills   if (!b->sparse_optimized) {
72045fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
72145fbe478SRichard Tran Mills   }
72245fbe478SRichard Tran Mills   csrA = a->csrA;
72345fbe478SRichard Tran Mills   csrB = b->csrA;
72445fbe478SRichard Tran Mills 
72545fbe478SRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
72645fbe478SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
72745fbe478SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
72845fbe478SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
72945fbe478SRichard Tran Mills   }
73045fbe478SRichard Tran Mills 
7316c87cf42SRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
73245fbe478SRichard Tran Mills 
73345fbe478SRichard Tran Mills   PetscFunctionReturn(0);
73445fbe478SRichard Tran Mills }
73545fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
73645fbe478SRichard Tran Mills 
737372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
738372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
739372ec6bbSRichard Tran Mills {
740372ec6bbSRichard Tran Mills   Mat_SeqAIJMKL *a, *b;
741372ec6bbSRichard Tran Mills   sparse_matrix_t csrA, csrB, csrC;
742372ec6bbSRichard Tran Mills   PetscErrorCode ierr;
743372ec6bbSRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
744372ec6bbSRichard Tran Mills 
745372ec6bbSRichard Tran Mills   PetscFunctionBegin;
746372ec6bbSRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
747372ec6bbSRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
748372ec6bbSRichard Tran Mills   if (!a->sparse_optimized) {
749372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
750372ec6bbSRichard Tran Mills   }
751372ec6bbSRichard Tran Mills   if (!b->sparse_optimized) {
752372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
753372ec6bbSRichard Tran Mills   }
754372ec6bbSRichard Tran Mills   csrA = a->csrA;
755372ec6bbSRichard Tran Mills   csrB = b->csrA;
756372ec6bbSRichard Tran Mills 
757372ec6bbSRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
758372ec6bbSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
759372ec6bbSRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
760372ec6bbSRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
761372ec6bbSRichard Tran Mills   }
762372ec6bbSRichard Tran Mills 
763372ec6bbSRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
764372ec6bbSRichard Tran Mills 
765372ec6bbSRichard Tran Mills   PetscFunctionReturn(0);
766372ec6bbSRichard Tran Mills }
767372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
768372ec6bbSRichard Tran Mills 
76987c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
770db63039fSRichard Tran Mills {
771db63039fSRichard Tran Mills   PetscErrorCode ierr;
772db63039fSRichard Tran Mills 
77387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
774db63039fSRichard Tran Mills   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
775db63039fSRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
776db63039fSRichard Tran Mills   PetscFunctionReturn(0);
777db63039fSRichard Tran Mills }
778df555b71SRichard Tran Mills 
77987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
78087c2a1d7SRichard Tran Mills {
78187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
78287c2a1d7SRichard Tran Mills 
78387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
78487c2a1d7SRichard Tran Mills   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
78587c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
78687c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
78787c2a1d7SRichard Tran Mills }
78887c2a1d7SRichard Tran Mills 
78987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
79087c2a1d7SRichard Tran Mills {
79187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
79287c2a1d7SRichard Tran Mills 
79387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
79487c2a1d7SRichard Tran Mills   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
79587c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
79687c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
79787c2a1d7SRichard Tran Mills }
79887c2a1d7SRichard Tran Mills 
79987c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
80087c2a1d7SRichard Tran Mills {
80187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
80287c2a1d7SRichard Tran Mills 
80387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
80487c2a1d7SRichard Tran Mills   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
80587c2a1d7SRichard Tran Mills   if (str == SAME_NONZERO_PATTERN) {
80687c2a1d7SRichard Tran Mills     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
80787c2a1d7SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
80887c2a1d7SRichard Tran Mills   }
80987c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
81087c2a1d7SRichard Tran Mills }
81187c2a1d7SRichard Tran Mills 
8124a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
8134a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
8144a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
8154a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
8164a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
8174a2a386eSRichard Tran Mills {
8184a2a386eSRichard Tran Mills   PetscErrorCode ierr;
8194a2a386eSRichard Tran Mills   Mat            B = *newmat;
8204a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
821c9d46305SRichard Tran Mills   PetscBool      set;
822e9c94282SRichard Tran Mills   PetscBool      sametype;
8234a2a386eSRichard Tran Mills 
8244a2a386eSRichard Tran Mills   PetscFunctionBegin;
8254a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
8264a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
8274a2a386eSRichard Tran Mills   }
8284a2a386eSRichard Tran Mills 
829e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
830e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
831e9c94282SRichard Tran Mills 
8324a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
8334a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
8344a2a386eSRichard Tran Mills 
835df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
836969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
8374a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
8384a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
8394a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
840c9d46305SRichard Tran Mills 
8414abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
842d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
843d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
844a8327b06SKarl Rupp #else
845d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
846d995685eSRichard Tran Mills #endif
8475b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
8484abfa3b3SRichard Tran Mills 
8494abfa3b3SRichard Tran Mills   /* Parse command line options. */
850c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
851c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
8525b49642aSRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
853c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
854d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
855d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
856d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
857d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
858d995685eSRichard Tran Mills   }
859d995685eSRichard Tran Mills #endif
860c9d46305SRichard Tran Mills 
861c9d46305SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
862d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
863df555b71SRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
864969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
865df555b71SRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
866969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
86745fbe478SRichard Tran Mills     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
868372ec6bbSRichard Tran Mills     B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
869d995685eSRichard Tran Mills #endif
870c9d46305SRichard Tran Mills   } else {
8714a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
872969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
8734a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
874969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
875c9d46305SRichard Tran Mills   }
8764a2a386eSRichard Tran Mills 
877db63039fSRichard Tran Mills   B->ops->scale              = MatScale_SeqAIJMKL;
87887c2a1d7SRichard Tran Mills   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
87987c2a1d7SRichard Tran Mills   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
88087c2a1d7SRichard Tran Mills   B->ops->axpy               = MatAXPY_SeqAIJMKL;
881db63039fSRichard Tran Mills 
882db63039fSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
8834a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
884e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
885e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
886e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
88745fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
88845fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
88945fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
890372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
89145fbe478SRichard Tran Mills #endif
89245fbe478SRichard Tran Mills   }
8934a2a386eSRichard Tran Mills 
8944a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
8954a2a386eSRichard Tran Mills   *newmat = B;
8964a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
8974a2a386eSRichard Tran Mills }
8984a2a386eSRichard Tran Mills 
8994a2a386eSRichard Tran Mills /*@C
9004a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
9014a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
9024a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
90390147e49SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
90490147e49SRichard Tran Mills    operations are currently supported.
90590147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
90690147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
90790147e49SRichard Tran Mills 
9084a2a386eSRichard Tran Mills    Collective on MPI_Comm
9094a2a386eSRichard Tran Mills 
9104a2a386eSRichard Tran Mills    Input Parameters:
9114a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
9124a2a386eSRichard Tran Mills .  m - number of rows
9134a2a386eSRichard Tran Mills .  n - number of columns
9144a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
9154a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
9164a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
9174a2a386eSRichard Tran Mills 
9184a2a386eSRichard Tran Mills    Output Parameter:
9194a2a386eSRichard Tran Mills .  A - the matrix
9204a2a386eSRichard Tran Mills 
92190147e49SRichard Tran Mills    Options Database Keys:
92290147e49SRichard Tran Mills .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
92390147e49SRichard Tran Mills 
9244a2a386eSRichard Tran Mills    Notes:
9254a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
9264a2a386eSRichard Tran Mills 
9274a2a386eSRichard Tran Mills    Level: intermediate
9284a2a386eSRichard Tran Mills 
92990147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel
9304a2a386eSRichard Tran Mills 
9314a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
9324a2a386eSRichard Tran Mills @*/
9334a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
9344a2a386eSRichard Tran Mills {
9354a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9364a2a386eSRichard Tran Mills 
9374a2a386eSRichard Tran Mills   PetscFunctionBegin;
9384a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
9394a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
9404a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
9414a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
9424a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9434a2a386eSRichard Tran Mills }
9444a2a386eSRichard Tran Mills 
9454a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
9464a2a386eSRichard Tran Mills {
9474a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9484a2a386eSRichard Tran Mills 
9494a2a386eSRichard Tran Mills   PetscFunctionBegin;
9504a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
9514a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
9524a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
9534a2a386eSRichard Tran Mills }
954