xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision db04c2a0c0f07ba9eda1fcdd8ad55f8a122c8773)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState    state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35c1d5218aSRichard Tran Mills #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
554f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
584222ddf1SHong Zhang 
59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60190ae7a4SRichard Tran Mills   if (!aijmkl->no_SpMV2) {
61190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63190ae7a4SRichard Tran Mills #endif
64190ae7a4SRichard Tran Mills   }
65190ae7a4SRichard Tran Mills 
664abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
68e9c94282SRichard Tran Mills    * the spptr pointer. */
69a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70a8327b06SKarl Rupp 
714abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
720632b357SRichard Tran Mills     sparse_status_t stat;
734abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
74*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
754abfa3b3SRichard Tran Mills   }
766718818eSStefano Zampini #endif
77e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
804a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
814a2a386eSRichard Tran Mills 
824a2a386eSRichard Tran Mills   *newmat = B;
834a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
844a2a386eSRichard Tran Mills }
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
874a2a386eSRichard Tran Mills {
884a2a386eSRichard Tran Mills   PetscErrorCode ierr;
894a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
904a2a386eSRichard Tran Mills 
914a2a386eSRichard Tran Mills   PetscFunctionBegin;
92e9c94282SRichard Tran Mills 
93e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
94e9c94282SRichard Tran Mills    * spptr pointer. */
95e9c94282SRichard Tran Mills   if (aijmkl) {
964a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
97ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
984abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
994abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1004abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
101*db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
1024abfa3b3SRichard Tran Mills     }
1034abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1044a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
105e9c94282SRichard Tran Mills   }
1064a2a386eSRichard Tran Mills 
1074a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1084a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1094a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1104a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1114a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1124a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1134a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1144a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1154a2a386eSRichard Tran Mills }
1164a2a386eSRichard Tran Mills 
117190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1185b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1195b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1205b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1215b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1225b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1235b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1246e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1254a2a386eSRichard Tran Mills {
126ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1276e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1286e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1296e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
13045fbe478SRichard Tran Mills   PetscFunctionBegin;
1316e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1326e369cd5SRichard Tran Mills #else
133a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
134a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
135a8327b06SKarl Rupp   PetscInt         m,n;
136a8327b06SKarl Rupp   MatScalar        *aa;
137a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1386e369cd5SRichard Tran Mills   sparse_status_t  stat;
139551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1404a2a386eSRichard Tran Mills 
141a8327b06SKarl Rupp   PetscFunctionBegin;
142e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
143e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
144e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
145e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
146e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1476e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1484d51fa23SRichard Tran Mills #endif
1496e369cd5SRichard Tran Mills 
1500632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1510632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1520632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1530632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
154*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
1550632b357SRichard Tran Mills   }
1568d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1576e369cd5SRichard Tran Mills 
158c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
159df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
160df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
161df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
16258678438SRichard Tran Mills   m = A->rmap->n;
16358678438SRichard Tran Mills   n = A->cmap->n;
164df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1671495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1688d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1698d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17058678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
172df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
173*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
174df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
1761950a7e7SRichard Tran Mills     if (!aijmkl->no_SpMV2) {
177df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
178*db04c2a0SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
1791950a7e7SRichard Tran Mills     }
1804abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
181e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
182190ae7a4SRichard Tran Mills   } else {
183190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
184c9d46305SRichard Tran Mills   }
1856e369cd5SRichard Tran Mills 
1866e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
187d995685eSRichard Tran Mills #endif
1886e369cd5SRichard Tran Mills }
1896e369cd5SRichard Tran Mills 
190ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
192190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
19319afcda9SRichard Tran Mills {
19419afcda9SRichard Tran Mills   PetscErrorCode      ierr;
19519afcda9SRichard Tran Mills   sparse_status_t     stat;
19619afcda9SRichard Tran Mills   sparse_index_base_t indexing;
197190ae7a4SRichard Tran Mills   PetscInt            m,n;
19845fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
19919afcda9SRichard Tran Mills   MatScalar           *aa;
20019afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
20119afcda9SRichard Tran Mills 
202190ae7a4SRichard Tran Mills   if (csrA) {
20345fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204190ae7a4SRichard Tran Mills     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
2059c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206190ae7a4SRichard Tran Mills     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
207190ae7a4SRichard Tran Mills   } else {
208190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
209190ae7a4SRichard Tran Mills     aa = PETSC_NULL;
210aab60f1bSRichard Tran Mills   }
211190ae7a4SRichard Tran Mills 
21219afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
21345fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
217aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218190ae7a4SRichard Tran Mills   if (csrA) {
219190ae7a4SRichard Tran Mills     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
220190ae7a4SRichard Tran Mills   } else {
221190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
222190ae7a4SRichard Tran Mills     ierr = MatSetUp(A);CHKERRQ(ierr);
223190ae7a4SRichard Tran Mills     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224190ae7a4SRichard Tran Mills     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225190ae7a4SRichard Tran Mills   }
22619afcda9SRichard Tran Mills 
22719afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22819afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22919afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2306c87cf42SRichard Tran Mills 
23119afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
23219afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2336c87cf42SRichard Tran Mills 
23419afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
23519afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23619afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
237f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
238f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
239f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
240190ae7a4SRichard Tran Mills   if (csrA) {
24119afcda9SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
24319afcda9SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
2451950a7e7SRichard Tran Mills   }
246e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
24719afcda9SRichard Tran Mills   PetscFunctionReturn(0);
24819afcda9SRichard Tran Mills }
24919afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
25019afcda9SRichard Tran Mills 
251190ae7a4SRichard Tran Mills 
252e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
253e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
254e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
255ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
257e8be1fc7SRichard Tran Mills {
258e8be1fc7SRichard Tran Mills   PetscInt            i;
259e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
260e8be1fc7SRichard Tran Mills   PetscInt            nz;
261e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
262e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
263e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
2641495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
265e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
266e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
267e8be1fc7SRichard Tran Mills 
268190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
270190ae7a4SRichard Tran Mills 
271e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
272e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
273e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
274e8be1fc7SRichard Tran Mills 
275e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
276e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
277e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
278e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
279e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
280e8be1fc7SRichard Tran Mills   }
281e8be1fc7SRichard Tran Mills 
282e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
283e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284e8be1fc7SRichard Tran Mills 
285e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
286e995cf24SRichard Tran Mills   /* We mark our matrix as having a valid, optimized MKL handle.
287e995cf24SRichard Tran Mills    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
288e995cf24SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
289e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
290e8be1fc7SRichard Tran Mills }
291e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
292e8be1fc7SRichard Tran Mills 
2933849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
2943849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
2953849ddb2SRichard Tran Mills {
2963849ddb2SRichard Tran Mills   PetscInt            i,j,k;
2973849ddb2SRichard Tran Mills   PetscInt            nrows,ncols;
2983849ddb2SRichard Tran Mills   PetscInt            nz;
2993849ddb2SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
3003849ddb2SRichard Tran Mills   PetscScalar         *aa;
3013849ddb2SRichard Tran Mills   PetscErrorCode      ierr;
3021495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3033849ddb2SRichard Tran Mills   sparse_status_t     stat;
3043849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
3053849ddb2SRichard Tran Mills 
3063849ddb2SRichard Tran Mills   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
3073849ddb2SRichard Tran Mills 
3083849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
3093849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
3103849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
3113849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
3123849ddb2SRichard Tran Mills   }
3133849ddb2SRichard Tran Mills 
3143849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
3153849ddb2SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
3163849ddb2SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
3173849ddb2SRichard Tran Mills 
3183849ddb2SRichard Tran Mills   k = 0;
3193849ddb2SRichard Tran Mills   for (i=0; i<nrows; i++) {
3203849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
3213849ddb2SRichard Tran Mills     nz = ai[i+1] - ai[i];
3223849ddb2SRichard Tran Mills     for (j=0; j<nz; j++) {
3233849ddb2SRichard Tran Mills       if (aa) {
3243849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
3253849ddb2SRichard Tran Mills       } else {
3263849ddb2SRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
3273849ddb2SRichard Tran Mills       }
3283849ddb2SRichard Tran Mills       k++;
3293849ddb2SRichard Tran Mills     }
3303849ddb2SRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
3313849ddb2SRichard Tran Mills   }
3323849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
3333849ddb2SRichard Tran Mills }
3343849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3353849ddb2SRichard Tran Mills 
3366e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
3376e369cd5SRichard Tran Mills {
3386e369cd5SRichard Tran Mills   PetscErrorCode ierr;
3391495fedeSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3406e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3416e369cd5SRichard Tran Mills 
3426e369cd5SRichard Tran Mills   PetscFunctionBegin;
3436e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
3446e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
345580bdb30SBarry Smith   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
3466e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3475b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3486e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3495b49642aSRichard Tran Mills   }
3506e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3516e369cd5SRichard Tran Mills }
3526e369cd5SRichard Tran Mills 
3536e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3546e369cd5SRichard Tran Mills {
3556e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3566e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3575b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3586e369cd5SRichard Tran Mills 
3596e369cd5SRichard Tran Mills   PetscFunctionBegin;
3606e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3616e369cd5SRichard Tran Mills 
3626e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3636e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3646e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3656e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
366d96e85feSRichard Tran Mills    * a lot of code duplication. */
3676e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3686e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3696e369cd5SRichard Tran Mills 
3705b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3715b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3725b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
3735b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3746e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3755b49642aSRichard Tran Mills   }
376df555b71SRichard Tran Mills 
3774a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3784a2a386eSRichard Tran Mills }
3794a2a386eSRichard Tran Mills 
380e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
3814a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3824a2a386eSRichard Tran Mills {
3834a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3844a2a386eSRichard Tran Mills   const PetscScalar *x;
3854a2a386eSRichard Tran Mills   PetscScalar       *y;
3864a2a386eSRichard Tran Mills   const MatScalar   *aa;
3874a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3884a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
389db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
390db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
391db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3924a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
393db63039fSRichard Tran Mills   char              matdescra[6];
394db63039fSRichard Tran Mills 
3954a2a386eSRichard Tran Mills 
3964a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
397ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
398ff03dc53SRichard Tran Mills 
399ff03dc53SRichard Tran Mills   PetscFunctionBegin;
400db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
401db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
402ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
403ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
404ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
405ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
406ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
407ff03dc53SRichard Tran Mills 
408ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
409db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
410ff03dc53SRichard Tran Mills 
411ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
412ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
415ff03dc53SRichard Tran Mills }
4161950a7e7SRichard Tran Mills #endif
417ff03dc53SRichard Tran Mills 
418ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
419df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
420df555b71SRichard Tran Mills {
421df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
422df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
423df555b71SRichard Tran Mills   const PetscScalar *x;
424df555b71SRichard Tran Mills   PetscScalar       *y;
425df555b71SRichard Tran Mills   PetscErrorCode    ierr;
426df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
427551aa5c8SRichard Tran Mills   PetscObjectState  state;
428df555b71SRichard Tran Mills 
429df555b71SRichard Tran Mills   PetscFunctionBegin;
430df555b71SRichard Tran Mills 
43138987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
43238987b35SRichard Tran Mills   if(!a->nz) {
43338987b35SRichard Tran Mills     PetscInt i;
43438987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
43538987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
43638987b35SRichard Tran Mills     for (i=0; i<m; i++) {
43738987b35SRichard Tran Mills       y[i] = 0.0;
43838987b35SRichard Tran Mills     }
43938987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
44038987b35SRichard Tran Mills     PetscFunctionReturn(0);
44138987b35SRichard Tran Mills   }
442f36dfe3fSRichard Tran Mills 
443df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
444df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
445df555b71SRichard Tran Mills 
4463fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4473fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4483fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
449551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
450551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4513fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4523fa15762SRichard Tran Mills   }
4533fa15762SRichard Tran Mills 
454df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
455df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
456*db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
457df555b71SRichard Tran Mills 
458df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
459df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
460df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
461df555b71SRichard Tran Mills   PetscFunctionReturn(0);
462df555b71SRichard Tran Mills }
463d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
464df555b71SRichard Tran Mills 
465e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
466ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
467ff03dc53SRichard Tran Mills {
468ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
469ff03dc53SRichard Tran Mills   const PetscScalar *x;
470ff03dc53SRichard Tran Mills   PetscScalar       *y;
471ff03dc53SRichard Tran Mills   const MatScalar   *aa;
472ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
473ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
474db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
475db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
476db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
477ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
478db63039fSRichard Tran Mills   char              matdescra[6];
479ff03dc53SRichard Tran Mills 
480ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
481ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4824a2a386eSRichard Tran Mills 
4834a2a386eSRichard Tran Mills   PetscFunctionBegin;
484969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
485969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4864a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4874a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4884a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4894a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4904a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4914a2a386eSRichard Tran Mills 
4924a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
493db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4944a2a386eSRichard Tran Mills 
4954a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4964a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4974a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4984a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4994a2a386eSRichard Tran Mills }
5001950a7e7SRichard Tran Mills #endif
5014a2a386eSRichard Tran Mills 
502ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
503df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
504df555b71SRichard Tran Mills {
505df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
506df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
507df555b71SRichard Tran Mills   const PetscScalar *x;
508df555b71SRichard Tran Mills   PetscScalar       *y;
509df555b71SRichard Tran Mills   PetscErrorCode    ierr;
5100632b357SRichard Tran Mills   sparse_status_t   stat;
511551aa5c8SRichard Tran Mills   PetscObjectState  state;
512df555b71SRichard Tran Mills 
513df555b71SRichard Tran Mills   PetscFunctionBegin;
514df555b71SRichard Tran Mills 
51538987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
51638987b35SRichard Tran Mills   if(!a->nz) {
51738987b35SRichard Tran Mills     PetscInt i;
51838987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
51938987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
52038987b35SRichard Tran Mills     for (i=0; i<n; i++) {
52138987b35SRichard Tran Mills       y[i] = 0.0;
52238987b35SRichard Tran Mills     }
52338987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
52438987b35SRichard Tran Mills     PetscFunctionReturn(0);
52538987b35SRichard Tran Mills   }
526f36dfe3fSRichard Tran Mills 
527df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
528df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
529df555b71SRichard Tran Mills 
5303fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5313fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5323fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
533551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
534551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5353fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5363fa15762SRichard Tran Mills   }
5373fa15762SRichard Tran Mills 
538df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
539df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
540*db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
541df555b71SRichard Tran Mills 
542df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
543df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
544df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
545df555b71SRichard Tran Mills   PetscFunctionReturn(0);
546df555b71SRichard Tran Mills }
547d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
548df555b71SRichard Tran Mills 
549e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
5504a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
5514a2a386eSRichard Tran Mills {
5524a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5534a2a386eSRichard Tran Mills   const PetscScalar *x;
5544a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5554a2a386eSRichard Tran Mills   const MatScalar   *aa;
5564a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5574a2a386eSRichard Tran Mills   PetscInt          m = A->rmap->n;
558db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
5594a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5604a2a386eSRichard Tran Mills   PetscInt          i;
5614a2a386eSRichard Tran Mills 
562ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
563ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
564a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
565db63039fSRichard Tran Mills   PetscScalar       beta;
566a84739b8SRichard Tran Mills   char              matdescra[6];
567ff03dc53SRichard Tran Mills 
568ff03dc53SRichard Tran Mills   PetscFunctionBegin;
569a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
570a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
571a84739b8SRichard Tran Mills 
572ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
573ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
574ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
575ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
576ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
577ff03dc53SRichard Tran Mills 
578ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
579a84739b8SRichard Tran Mills   if (zz == yy) {
580a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
581db63039fSRichard Tran Mills     beta = 1.0;
582db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583a84739b8SRichard Tran Mills   } else {
584db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
585db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
586db63039fSRichard Tran Mills     beta = 0.0;
587db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
588ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
589ff03dc53SRichard Tran Mills       z[i] += y[i];
590ff03dc53SRichard Tran Mills     }
591a84739b8SRichard Tran Mills   }
592ff03dc53SRichard Tran Mills 
593ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
594ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
595ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
596ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
597ff03dc53SRichard Tran Mills }
5981950a7e7SRichard Tran Mills #endif
599ff03dc53SRichard Tran Mills 
600ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
601df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
602df555b71SRichard Tran Mills {
603df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
604df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
605df555b71SRichard Tran Mills   const PetscScalar *x;
606df555b71SRichard Tran Mills   PetscScalar       *y,*z;
607df555b71SRichard Tran Mills   PetscErrorCode    ierr;
608df555b71SRichard Tran Mills   PetscInt          m = A->rmap->n;
609df555b71SRichard Tran Mills   PetscInt          i;
610df555b71SRichard Tran Mills 
611df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
612df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
613551aa5c8SRichard Tran Mills   PetscObjectState  state;
614df555b71SRichard Tran Mills 
615df555b71SRichard Tran Mills   PetscFunctionBegin;
616df555b71SRichard Tran Mills 
61738987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
61838987b35SRichard Tran Mills   if(!a->nz) {
61938987b35SRichard Tran Mills     PetscInt i;
62038987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62138987b35SRichard Tran Mills     for (i=0; i<m; i++) {
62238987b35SRichard Tran Mills       z[i] = y[i];
62338987b35SRichard Tran Mills     }
62438987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62538987b35SRichard Tran Mills     PetscFunctionReturn(0);
62638987b35SRichard Tran Mills   }
627df555b71SRichard Tran Mills 
628df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
629df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
630df555b71SRichard Tran Mills 
6313fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6323fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6333fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
634551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
635551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
6363fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6373fa15762SRichard Tran Mills   }
6383fa15762SRichard Tran Mills 
639df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
640df555b71SRichard Tran Mills   if (zz == yy) {
641df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
642df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
643db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
644*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645df555b71SRichard Tran Mills   } else {
646df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
647df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
648db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
649*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
650df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
651df555b71SRichard Tran Mills       z[i] += y[i];
652df555b71SRichard Tran Mills     }
653df555b71SRichard Tran Mills   }
654df555b71SRichard Tran Mills 
655df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
656df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
657df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
658df555b71SRichard Tran Mills   PetscFunctionReturn(0);
659df555b71SRichard Tran Mills }
660d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
661df555b71SRichard Tran Mills 
662e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
663ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
664ff03dc53SRichard Tran Mills {
665ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
666ff03dc53SRichard Tran Mills   const PetscScalar *x;
667ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
668ff03dc53SRichard Tran Mills   const MatScalar   *aa;
669ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
670ff03dc53SRichard Tran Mills   PetscInt          m = A->rmap->n;
671db63039fSRichard Tran Mills   PetscInt          n = A->cmap->n;
672ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
673ff03dc53SRichard Tran Mills   PetscInt          i;
674ff03dc53SRichard Tran Mills 
675ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
676ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
677a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
678db63039fSRichard Tran Mills   PetscScalar       beta;
679a84739b8SRichard Tran Mills   char              matdescra[6];
6804a2a386eSRichard Tran Mills 
6814a2a386eSRichard Tran Mills   PetscFunctionBegin;
682a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
683a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
684a84739b8SRichard Tran Mills 
6854a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6864a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6874a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6884a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6894a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6904a2a386eSRichard Tran Mills 
6914a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
692a84739b8SRichard Tran Mills   if (zz == yy) {
693a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
694db63039fSRichard Tran Mills     beta = 1.0;
695969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696a84739b8SRichard Tran Mills   } else {
697db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
698db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
699db63039fSRichard Tran Mills     beta = 0.0;
700db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
701969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
7024a2a386eSRichard Tran Mills       z[i] += y[i];
7034a2a386eSRichard Tran Mills     }
704a84739b8SRichard Tran Mills   }
7054a2a386eSRichard Tran Mills 
7064a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
7074a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7084a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
7094a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7104a2a386eSRichard Tran Mills }
7111950a7e7SRichard Tran Mills #endif
7124a2a386eSRichard Tran Mills 
713ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
714df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
715df555b71SRichard Tran Mills {
716df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
717df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
718df555b71SRichard Tran Mills   const PetscScalar *x;
719df555b71SRichard Tran Mills   PetscScalar       *y,*z;
720df555b71SRichard Tran Mills   PetscErrorCode    ierr;
721969800c5SRichard Tran Mills   PetscInt          n = A->cmap->n;
722df555b71SRichard Tran Mills   PetscInt          i;
723551aa5c8SRichard Tran Mills   PetscObjectState  state;
724df555b71SRichard Tran Mills 
725df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
726df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
727df555b71SRichard Tran Mills 
728df555b71SRichard Tran Mills   PetscFunctionBegin;
729df555b71SRichard Tran Mills 
73038987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
73138987b35SRichard Tran Mills   if(!a->nz) {
73238987b35SRichard Tran Mills     PetscInt i;
73338987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
73438987b35SRichard Tran Mills     for (i=0; i<n; i++) {
73538987b35SRichard Tran Mills       z[i] = y[i];
73638987b35SRichard Tran Mills     }
73738987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
73838987b35SRichard Tran Mills     PetscFunctionReturn(0);
73938987b35SRichard Tran Mills   }
740f36dfe3fSRichard Tran Mills 
741df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
742df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
743df555b71SRichard Tran Mills 
7443fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
7453fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
7463fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
747551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
748551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
7493fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
7503fa15762SRichard Tran Mills   }
7513fa15762SRichard Tran Mills 
752df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
753df555b71SRichard Tran Mills   if (zz == yy) {
754df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
755df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
756db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
757*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758df555b71SRichard Tran Mills   } else {
759df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
760df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
761db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
762*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
763969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
764df555b71SRichard Tran Mills       z[i] += y[i];
765df555b71SRichard Tran Mills     }
766df555b71SRichard Tran Mills   }
767df555b71SRichard Tran Mills 
768df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
769df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
770df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
771df555b71SRichard Tran Mills   PetscFunctionReturn(0);
772df555b71SRichard Tran Mills }
773d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
774df555b71SRichard Tran Mills 
775190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
7768a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
777190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
778431879ecSRichard Tran Mills {
7791495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
780431879ecSRichard Tran Mills   sparse_matrix_t     csrA,csrB,csrC;
781190ae7a4SRichard Tran Mills   PetscInt            nrows,ncols;
782431879ecSRichard Tran Mills   PetscErrorCode      ierr;
783431879ecSRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
784431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
785431879ecSRichard Tran Mills   PetscObjectState    state;
786431879ecSRichard Tran Mills 
787431879ecSRichard Tran Mills   PetscFunctionBegin;
788190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
789190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
790190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
791190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
792190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
793190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
794190ae7a4SRichard Tran Mills 
795431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
796431879ecSRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
797431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
798431879ecSRichard Tran Mills   }
799431879ecSRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
800431879ecSRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
801431879ecSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
802431879ecSRichard Tran Mills   }
803431879ecSRichard Tran Mills   csrA = a->csrA;
804431879ecSRichard Tran Mills   csrB = b->csrA;
805431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
806431879ecSRichard Tran Mills 
807190ae7a4SRichard Tran Mills   if (csrA && csrB) {
808190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
809*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
810190ae7a4SRichard Tran Mills   } else {
811190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
812190ae7a4SRichard Tran Mills   }
813190ae7a4SRichard Tran Mills 
814190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
815431879ecSRichard Tran Mills 
816431879ecSRichard Tran Mills   PetscFunctionReturn(0);
817431879ecSRichard Tran Mills }
818431879ecSRichard Tran Mills 
819190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
820e8be1fc7SRichard Tran Mills {
8211495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
822e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
823e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
824e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
825e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
826e8be1fc7SRichard Tran Mills   PetscObjectState    state;
827e8be1fc7SRichard Tran Mills 
828e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
829e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
830e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
831e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
832e8be1fc7SRichard Tran Mills   }
833e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
834e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
835e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
836e8be1fc7SRichard Tran Mills   }
837e8be1fc7SRichard Tran Mills   csrA = a->csrA;
838e8be1fc7SRichard Tran Mills   csrB = b->csrA;
839e8be1fc7SRichard Tran Mills   csrC = c->csrA;
840e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
841e8be1fc7SRichard Tran Mills 
842190ae7a4SRichard Tran Mills   if (csrA && csrB) {
843190ae7a4SRichard Tran Mills     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
844*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
845190ae7a4SRichard Tran Mills   } else {
846190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
847190ae7a4SRichard Tran Mills   }
848e8be1fc7SRichard Tran Mills 
849e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8504f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
851e8be1fc7SRichard Tran Mills 
852e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
853e8be1fc7SRichard Tran Mills }
854e8be1fc7SRichard Tran Mills 
855190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
8564f53af40SRichard Tran Mills {
857190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
858190ae7a4SRichard Tran Mills 
859190ae7a4SRichard Tran Mills   PetscFunctionBegin;
860190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
861190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
862190ae7a4SRichard Tran Mills }
863190ae7a4SRichard Tran Mills 
864190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
865190ae7a4SRichard Tran Mills {
866190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
867190ae7a4SRichard Tran Mills 
868190ae7a4SRichard Tran Mills   PetscFunctionBegin;
869190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
870190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
871190ae7a4SRichard Tran Mills }
872190ae7a4SRichard Tran Mills 
873190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
874190ae7a4SRichard Tran Mills {
875190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
876190ae7a4SRichard Tran Mills 
877190ae7a4SRichard Tran Mills   PetscFunctionBegin;
878190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
879190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
880190ae7a4SRichard Tran Mills }
881190ae7a4SRichard Tran Mills 
882190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
883190ae7a4SRichard Tran Mills {
884190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
885190ae7a4SRichard Tran Mills 
886190ae7a4SRichard Tran Mills   PetscFunctionBegin;
887190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
888190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
889190ae7a4SRichard Tran Mills }
890190ae7a4SRichard Tran Mills 
891190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
892190ae7a4SRichard Tran Mills {
893190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
894190ae7a4SRichard Tran Mills 
895190ae7a4SRichard Tran Mills   PetscFunctionBegin;
896190ae7a4SRichard Tran Mills   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
897190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
898190ae7a4SRichard Tran Mills }
899190ae7a4SRichard Tran Mills 
900190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
901190ae7a4SRichard Tran Mills {
902190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
903190ae7a4SRichard Tran Mills 
904190ae7a4SRichard Tran Mills   PetscFunctionBegin;
905190ae7a4SRichard Tran Mills   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
906190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
907190ae7a4SRichard Tran Mills }
908190ae7a4SRichard Tran Mills 
909190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
910190ae7a4SRichard Tran Mills {
911190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
912190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
913190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
914190ae7a4SRichard Tran Mills 
915190ae7a4SRichard Tran Mills   PetscFunctionBegin;
916190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
917190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
918190ae7a4SRichard Tran Mills }
919190ae7a4SRichard Tran Mills 
920190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
921190ae7a4SRichard Tran Mills {
922190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
923190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
924190ae7a4SRichard Tran Mills   Mat            A = product->A,B = product->B;
925190ae7a4SRichard Tran Mills   PetscReal      fill = product->fill;
926190ae7a4SRichard Tran Mills 
927190ae7a4SRichard Tran Mills   PetscFunctionBegin;
928190ae7a4SRichard Tran Mills   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
929190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
930190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
931190ae7a4SRichard Tran Mills }
932190ae7a4SRichard Tran Mills 
93349ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
934190ae7a4SRichard Tran Mills {
935190ae7a4SRichard Tran Mills   Mat                 Ct;
936190ae7a4SRichard Tran Mills   Vec                 zeros;
9371495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
9384f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
9394f53af40SRichard Tran Mills   PetscBool           set, flag;
9404f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
941b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
9424f53af40SRichard Tran Mills   PetscObjectState    state;
9434f53af40SRichard Tran Mills   PetscErrorCode      ierr;
9444f53af40SRichard Tran Mills 
9454f53af40SRichard Tran Mills   PetscFunctionBegin;
9464f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
94749ba5396SRichard Tran Mills   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
9484f53af40SRichard Tran Mills 
9494f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
9504f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
9514f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
9524f53af40SRichard Tran Mills   }
9534f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
9544f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
9554f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
9564f53af40SRichard Tran Mills   }
9574f53af40SRichard Tran Mills   csrA = a->csrA;
9584f53af40SRichard Tran Mills   csrP = p->csrA;
9594f53af40SRichard Tran Mills   csrC = c->csrA;
960b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
961190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
962b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
9634f53af40SRichard Tran Mills 
964f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
965b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
966*db04c2a0SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
9674f53af40SRichard Tran Mills 
968190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
969190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
970190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
971190ae7a4SRichard Tran Mills    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
972190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
973190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
974190ae7a4SRichard Tran Mills    * the full matrix. */
9754f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
976190ae7a4SRichard Tran Mills   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
977190ae7a4SRichard Tran Mills   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
978190ae7a4SRichard Tran Mills   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
979190ae7a4SRichard Tran Mills   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
980190ae7a4SRichard Tran Mills   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
981190ae7a4SRichard Tran Mills   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
982190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
983190ae7a4SRichard Tran Mills   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
9841495fedeSRichard Tran Mills   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
985190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
986190ae7a4SRichard Tran Mills   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
987190ae7a4SRichard Tran Mills   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
9884f53af40SRichard Tran Mills   PetscFunctionReturn(0);
9894f53af40SRichard Tran Mills }
990190ae7a4SRichard Tran Mills 
991190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
992190ae7a4SRichard Tran Mills {
993190ae7a4SRichard Tran Mills   Mat_Product         *product = C->product;
994190ae7a4SRichard Tran Mills   Mat                 A = product->A,P = product->B;
9951495fedeSRichard Tran Mills   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
996190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA,csrP,csrC;
997190ae7a4SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
998190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
999190ae7a4SRichard Tran Mills   PetscObjectState    state;
1000190ae7a4SRichard Tran Mills   PetscErrorCode      ierr;
1001190ae7a4SRichard Tran Mills 
1002190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1003190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1004190ae7a4SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
1005190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
1006190ae7a4SRichard Tran Mills   }
1007190ae7a4SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1008190ae7a4SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
1009190ae7a4SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
1010190ae7a4SRichard Tran Mills   }
1011190ae7a4SRichard Tran Mills   csrA = a->csrA;
1012190ae7a4SRichard Tran Mills   csrP = p->csrA;
1013190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1014190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1015190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1016190ae7a4SRichard Tran Mills 
1017190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1018190ae7a4SRichard Tran Mills   if (csrP && csrA) {
1019190ae7a4SRichard Tran Mills     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1020*db04c2a0SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1021190ae7a4SRichard Tran Mills   } else {
1022190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
1023190ae7a4SRichard Tran Mills   }
1024190ae7a4SRichard Tran Mills 
1025190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1026190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
102749ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
102849ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
102949ba5396SRichard Tran Mills    * is guaranteed. */
1030190ae7a4SRichard Tran Mills   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1031190ae7a4SRichard Tran Mills 
1032190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
1033190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1034190ae7a4SRichard Tran Mills }
1035190ae7a4SRichard Tran Mills 
1036190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1037190ae7a4SRichard Tran Mills {
1038190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1039190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
1040190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1041190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1042190ae7a4SRichard Tran Mills }
1043190ae7a4SRichard Tran Mills 
1044190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1045190ae7a4SRichard Tran Mills {
1046190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1047190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1048190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1049190ae7a4SRichard Tran Mills }
1050190ae7a4SRichard Tran Mills 
1051190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1052190ae7a4SRichard Tran Mills {
1053190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1054190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1055190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1056190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1057190ae7a4SRichard Tran Mills }
1058190ae7a4SRichard Tran Mills 
1059190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1060190ae7a4SRichard Tran Mills {
1061190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1062190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1063190ae7a4SRichard Tran Mills   Mat            A = product->A;
1064190ae7a4SRichard Tran Mills   PetscBool      set, flag;
1065190ae7a4SRichard Tran Mills 
1066190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1067190ae7a4SRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
1068190ae7a4SRichard Tran Mills   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1069190ae7a4SRichard Tran Mills    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1070190ae7a4SRichard Tran Mills    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1071190ae7a4SRichard Tran Mills    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1072190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL;
1073190ae7a4SRichard Tran Mills #else
1074190ae7a4SRichard Tran Mills   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1075190ae7a4SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1076190ae7a4SRichard Tran Mills   if (set && flag) {
1077190ae7a4SRichard Tran Mills     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1078190ae7a4SRichard Tran Mills     PetscFunctionReturn(0);
1079190ae7a4SRichard Tran Mills   } else {
1080190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1081190ae7a4SRichard Tran Mills   }
10821495fedeSRichard Tran Mills   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1083190ae7a4SRichard Tran Mills    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1084190ae7a4SRichard Tran Mills #endif
1085190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1086190ae7a4SRichard Tran Mills }
1087190ae7a4SRichard Tran Mills 
1088190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1089190ae7a4SRichard Tran Mills {
1090190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1091190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1092190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1093190ae7a4SRichard Tran Mills }
1094190ae7a4SRichard Tran Mills 
1095190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1096190ae7a4SRichard Tran Mills {
1097190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1098190ae7a4SRichard Tran Mills   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1099190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1100190ae7a4SRichard Tran Mills }
1101190ae7a4SRichard Tran Mills 
1102190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1103190ae7a4SRichard Tran Mills {
1104190ae7a4SRichard Tran Mills   PetscErrorCode ierr;
1105190ae7a4SRichard Tran Mills   Mat_Product    *product = C->product;
1106190ae7a4SRichard Tran Mills 
1107190ae7a4SRichard Tran Mills   PetscFunctionBegin;
1108190ae7a4SRichard Tran Mills   switch (product->type) {
1109190ae7a4SRichard Tran Mills   case MATPRODUCT_AB:
1110190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1111190ae7a4SRichard Tran Mills     break;
1112190ae7a4SRichard Tran Mills   case MATPRODUCT_AtB:
1113190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1114190ae7a4SRichard Tran Mills     break;
1115190ae7a4SRichard Tran Mills   case MATPRODUCT_ABt:
1116190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1117190ae7a4SRichard Tran Mills     break;
1118190ae7a4SRichard Tran Mills   case MATPRODUCT_PtAP:
1119190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1120190ae7a4SRichard Tran Mills     break;
1121190ae7a4SRichard Tran Mills   case MATPRODUCT_RARt:
1122190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1123190ae7a4SRichard Tran Mills     break;
1124190ae7a4SRichard Tran Mills   case MATPRODUCT_ABC:
1125190ae7a4SRichard Tran Mills     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1126190ae7a4SRichard Tran Mills     break;
1127190ae7a4SRichard Tran Mills   default:
1128190ae7a4SRichard Tran Mills     break;
1129190ae7a4SRichard Tran Mills   }
1130190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
1131190ae7a4SRichard Tran Mills }
1132431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1133190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
11344f53af40SRichard Tran Mills 
11354a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1136510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
11374a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
11384a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
11394a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
11404a2a386eSRichard Tran Mills {
11414a2a386eSRichard Tran Mills   PetscErrorCode ierr;
11424a2a386eSRichard Tran Mills   Mat            B = *newmat;
11434a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
1144c9d46305SRichard Tran Mills   PetscBool      set;
1145e9c94282SRichard Tran Mills   PetscBool      sametype;
11464a2a386eSRichard Tran Mills 
11474a2a386eSRichard Tran Mills   PetscFunctionBegin;
11484a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
11494a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
11504a2a386eSRichard Tran Mills   }
11514a2a386eSRichard Tran Mills 
1152e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1153e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1154e9c94282SRichard Tran Mills 
11554a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
11564a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
11574a2a386eSRichard Tran Mills 
1158df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1159969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
11604a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
11614a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
11624a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1163c9d46305SRichard Tran Mills 
11644abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1165ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1166d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1167a8327b06SKarl Rupp #else
1168d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1169d995685eSRichard Tran Mills #endif
11705b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
11714abfa3b3SRichard Tran Mills 
11724abfa3b3SRichard Tran Mills   /* Parse command line options. */
1173c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
117448292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
117548292275SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1176c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1177ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1178d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1179d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1180d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1181d995685eSRichard Tran Mills   }
1182d995685eSRichard Tran Mills #endif
1183c9d46305SRichard Tran Mills 
1184ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1185df555b71SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1186969800c5SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1187df555b71SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1188969800c5SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
11898a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1190190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1191190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1192190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1194190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1195ffcab697SRichard Tran Mills #   if !defined(PETSC_USE_COMPLEX)
119649ba5396SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1197190ae7a4SRichard Tran Mills #   else
1198190ae7a4SRichard Tran Mills   B->ops->ptapnumeric             = NULL;
11994f53af40SRichard Tran Mills #   endif
1200e8be1fc7SRichard Tran Mills # endif
12011950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
12021950a7e7SRichard Tran Mills 
1203213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1204213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1205213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1206213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1207213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
12081950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
12094a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1210969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
12114a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1212969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1213c9d46305SRichard Tran Mills   }
12141950a7e7SRichard Tran Mills #endif
12154a2a386eSRichard Tran Mills 
12164a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
12174a2a386eSRichard Tran Mills 
1218190ae7a4SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
1219190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1220190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1221190ae7a4SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1222190ae7a4SRichard Tran Mills #endif
1223190ae7a4SRichard Tran Mills #endif
1224190ae7a4SRichard Tran Mills   }
1225190ae7a4SRichard Tran Mills 
12264a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
12274a2a386eSRichard Tran Mills   *newmat = B;
12284a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12294a2a386eSRichard Tran Mills }
12304a2a386eSRichard Tran Mills 
12314a2a386eSRichard Tran Mills /*@C
12324a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
12334a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
12344a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
123590147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
123690147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1237597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1238597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1239597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
124090147e49SRichard Tran Mills 
1241d083f849SBarry Smith    Collective
12424a2a386eSRichard Tran Mills 
12434a2a386eSRichard Tran Mills    Input Parameters:
12444a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
12454a2a386eSRichard Tran Mills .  m - number of rows
12464a2a386eSRichard Tran Mills .  n - number of columns
12474a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
12484a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
12494a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
12504a2a386eSRichard Tran Mills 
12514a2a386eSRichard Tran Mills    Output Parameter:
12524a2a386eSRichard Tran Mills .  A - the matrix
12534a2a386eSRichard Tran Mills 
125490147e49SRichard Tran Mills    Options Database Keys:
125566b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
125666b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
125790147e49SRichard Tran Mills 
12584a2a386eSRichard Tran Mills    Notes:
12594a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
12604a2a386eSRichard Tran Mills 
12614a2a386eSRichard Tran Mills    Level: intermediate
12624a2a386eSRichard Tran Mills 
12634a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
12644a2a386eSRichard Tran Mills @*/
12654a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
12664a2a386eSRichard Tran Mills {
12674a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12684a2a386eSRichard Tran Mills 
12694a2a386eSRichard Tran Mills   PetscFunctionBegin;
12704a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
12714a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
12724a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
12734a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
12744a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12754a2a386eSRichard Tran Mills }
12764a2a386eSRichard Tran Mills 
12774a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
12784a2a386eSRichard Tran Mills {
12794a2a386eSRichard Tran Mills   PetscErrorCode ierr;
12804a2a386eSRichard Tran Mills 
12814a2a386eSRichard Tran Mills   PetscFunctionBegin;
12824a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
12834a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
12844a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
12854a2a386eSRichard Tran Mills }
1286