xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision d96e85fed5c8182a229394a8f92211a7d60de233)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
174abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20df555b71SRichard Tran Mills   struct matrix_descr descr;
21b8cbc1fbSRichard Tran Mills #endif
224a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
234a2a386eSRichard Tran Mills 
244a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
254a2a386eSRichard Tran Mills 
264a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
274a2a386eSRichard Tran Mills {
284a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
294a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
304a2a386eSRichard Tran Mills   PetscErrorCode ierr;
314a2a386eSRichard Tran Mills   Mat            B       = *newmat;
324a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
334a2a386eSRichard Tran Mills 
344a2a386eSRichard Tran Mills   PetscFunctionBegin;
354a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
364a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
37e9c94282SRichard Tran Mills     aijmkl = (Mat_SeqAIJMKL*)B->spptr;
384a2a386eSRichard Tran Mills   }
394a2a386eSRichard Tran Mills 
404a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4154871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
434a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4454871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
45ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
4887c2a1d7SRichard Tran Mills   B->ops->scale            = MatScale_SeqAIJ;
4987c2a1d7SRichard Tran Mills   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
5087c2a1d7SRichard Tran Mills   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
5187c2a1d7SRichard Tran Mills   B->ops->axpy             = MatAXPY_SeqAIJ;
524a2a386eSRichard Tran Mills 
53e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
54e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
55e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
56e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
57e9c94282SRichard Tran Mills 
584abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
59e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
60e9c94282SRichard Tran Mills    * the spptr pointer. */
614abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
624abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
630632b357SRichard Tran Mills     sparse_status_t stat;
644abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
654abfa3b3SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
664abfa3b3SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
674abfa3b3SRichard Tran Mills     }
684abfa3b3SRichard Tran Mills   }
694abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
70e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
714a2a386eSRichard Tran Mills 
724a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
734a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
744a2a386eSRichard Tran Mills 
754a2a386eSRichard Tran Mills   *newmat = B;
764a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
774a2a386eSRichard Tran Mills }
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
804a2a386eSRichard Tran Mills {
814a2a386eSRichard Tran Mills   PetscErrorCode ierr;
824a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
834a2a386eSRichard Tran Mills 
844a2a386eSRichard Tran Mills   PetscFunctionBegin;
85e9c94282SRichard Tran Mills 
86e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
87e9c94282SRichard Tran Mills    * spptr pointer. */
88e9c94282SRichard Tran Mills   if (aijmkl) {
894a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
904abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
914abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
924abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
934abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
944abfa3b3SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) {
954abfa3b3SRichard Tran Mills         PetscFunctionReturn(PETSC_ERR_LIB);
964abfa3b3SRichard Tran Mills       }
974abfa3b3SRichard Tran Mills     }
984abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
994a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
100e9c94282SRichard Tran Mills   }
1014a2a386eSRichard Tran Mills 
1024a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1034a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1044a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1054a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1064a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1074a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1084a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1094a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1104a2a386eSRichard Tran Mills }
1114a2a386eSRichard Tran Mills 
1126e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1134a2a386eSRichard Tran Mills {
1144a2a386eSRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
115df555b71SRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
11658678438SRichard Tran Mills   PetscInt        m,n;
1176e369cd5SRichard Tran Mills   MatScalar       *aa;
118df555b71SRichard Tran Mills   PetscInt        *aj,*ai;
1194a2a386eSRichard Tran Mills 
1206e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1216e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1226e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1236e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
1246e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1256e369cd5SRichard Tran Mills #else
1264a2a386eSRichard Tran Mills 
1276e369cd5SRichard Tran Mills   sparse_status_t stat;
1284a2a386eSRichard Tran Mills 
129df555b71SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
1306e369cd5SRichard Tran Mills 
1316e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1326e369cd5SRichard Tran Mills 
1330632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1340632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1350632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1360632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1370632b357SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
1380632b357SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
1390632b357SRichard Tran Mills     }
1400632b357SRichard Tran Mills   }
1418d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1426e369cd5SRichard Tran Mills 
143c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
144df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
145df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
146df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
14758678438SRichard Tran Mills   m = A->rmap->n;
14858678438SRichard Tran Mills   n = A->cmap->n;
149df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
150df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
151df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1528d3fe1b0SRichard Tran Mills   if (a->nz) {
1538d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1548d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
15558678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
156df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
157df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
158df555b71SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
159df555b71SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
160f68ad4bdSRichard Tran Mills       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
161df555b71SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
162df555b71SRichard Tran Mills     }
1634abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
164c9d46305SRichard Tran Mills   }
1656e369cd5SRichard Tran Mills 
1666e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
167d995685eSRichard Tran Mills #endif
1686e369cd5SRichard Tran Mills }
1696e369cd5SRichard Tran Mills 
1706e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
1716e369cd5SRichard Tran Mills {
1726e369cd5SRichard Tran Mills   PetscErrorCode ierr;
1736e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
1746e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
1756e369cd5SRichard Tran Mills 
1766e369cd5SRichard Tran Mills   PetscFunctionBegin;
1776e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
1786e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
1796e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
1806e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
1816e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
1826e369cd5SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
1836e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1846e369cd5SRichard Tran Mills }
1856e369cd5SRichard Tran Mills 
1866e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
1876e369cd5SRichard Tran Mills {
1886e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
1896e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1906e369cd5SRichard Tran Mills 
1916e369cd5SRichard Tran Mills   PetscFunctionBegin;
1926e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1936e369cd5SRichard Tran Mills 
1946e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
1956e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
1966e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
1976e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
198*d96e85feSRichard Tran Mills    * a lot of code duplication. */
1996e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
2006e369cd5SRichard Tran Mills   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
2016e369cd5SRichard Tran Mills 
2026e369cd5SRichard Tran Mills   /* Now create the MKL sparse handle (if needed; the function checks). */
2036e369cd5SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
204df555b71SRichard Tran Mills 
2054a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2064a2a386eSRichard Tran Mills }
2074a2a386eSRichard Tran Mills 
2084a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
2094a2a386eSRichard Tran Mills {
2104a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
2114a2a386eSRichard Tran Mills   const PetscScalar *x;
2124a2a386eSRichard Tran Mills   PetscScalar       *y;
2134a2a386eSRichard Tran Mills   const MatScalar   *aa;
2144a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
2154a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
216db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
217db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
218db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
2194a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
220db63039fSRichard Tran Mills   char              matdescra[6];
221db63039fSRichard Tran Mills 
2224a2a386eSRichard Tran Mills 
2234a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
224ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
225ff03dc53SRichard Tran Mills 
226ff03dc53SRichard Tran Mills   PetscFunctionBegin;
227db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
228db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
229ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
230ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
231ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
232ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
233ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
234ff03dc53SRichard Tran Mills 
235ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
236db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
237ff03dc53SRichard Tran Mills 
238ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
239ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
240ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
241ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
242ff03dc53SRichard Tran Mills }
243ff03dc53SRichard Tran Mills 
244d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
245df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
246df555b71SRichard Tran Mills {
247df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
248df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
249df555b71SRichard Tran Mills   const PetscScalar *x;
250df555b71SRichard Tran Mills   PetscScalar       *y;
251df555b71SRichard Tran Mills   PetscErrorCode    ierr;
252df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
253df555b71SRichard Tran Mills 
254df555b71SRichard Tran Mills   PetscFunctionBegin;
255df555b71SRichard Tran Mills 
2568d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
2578d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
258f36dfe3fSRichard Tran Mills 
259df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
260df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
261df555b71SRichard Tran Mills 
2623fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2633fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2643fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
2653fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
2663fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
2673fa15762SRichard Tran Mills   }
2683fa15762SRichard Tran Mills 
269df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
270df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
271df555b71SRichard Tran Mills 
272df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
273df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
274df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
275df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
276df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
277df555b71SRichard Tran Mills   }
278df555b71SRichard Tran Mills   PetscFunctionReturn(0);
279df555b71SRichard Tran Mills }
280d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
281df555b71SRichard Tran Mills 
282ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
283ff03dc53SRichard Tran Mills {
284ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
285ff03dc53SRichard Tran Mills   const PetscScalar *x;
286ff03dc53SRichard Tran Mills   PetscScalar       *y;
287ff03dc53SRichard Tran Mills   const MatScalar   *aa;
288ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
289ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
290db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
291db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
292db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
293ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
294db63039fSRichard Tran Mills   char              matdescra[6];
295ff03dc53SRichard Tran Mills 
296ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
297ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
2984a2a386eSRichard Tran Mills 
2994a2a386eSRichard Tran Mills   PetscFunctionBegin;
300969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
301969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
3024a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3034a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
3044a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
3054a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
3064a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
3074a2a386eSRichard Tran Mills 
3084a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
309db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
3104a2a386eSRichard Tran Mills 
3114a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
3124a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3134a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
3144a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3154a2a386eSRichard Tran Mills }
3164a2a386eSRichard Tran Mills 
317d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
318df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
319df555b71SRichard Tran Mills {
320df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
321df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
322df555b71SRichard Tran Mills   const PetscScalar *x;
323df555b71SRichard Tran Mills   PetscScalar       *y;
324df555b71SRichard Tran Mills   PetscErrorCode    ierr;
3250632b357SRichard Tran Mills   sparse_status_t   stat;
326df555b71SRichard Tran Mills 
327df555b71SRichard Tran Mills   PetscFunctionBegin;
328df555b71SRichard Tran Mills 
3298d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
3308d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
331f36dfe3fSRichard Tran Mills 
332df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
333df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
334df555b71SRichard Tran Mills 
3353fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3363fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3373fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3383fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
3393fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3403fa15762SRichard Tran Mills   }
3413fa15762SRichard Tran Mills 
342df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
343df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
344df555b71SRichard Tran Mills 
345df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
346df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
347df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
348df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
349df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
350df555b71SRichard Tran Mills   }
351df555b71SRichard Tran Mills   PetscFunctionReturn(0);
352df555b71SRichard Tran Mills }
353d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
354df555b71SRichard Tran Mills 
3554a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
3564a2a386eSRichard Tran Mills {
3574a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3584a2a386eSRichard Tran Mills   const PetscScalar *x;
3594a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
3604a2a386eSRichard Tran Mills   const MatScalar   *aa;
3614a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3624a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
363db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
3644a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
3654a2a386eSRichard Tran Mills   PetscInt          i;
3664a2a386eSRichard Tran Mills 
367ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
368ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
369a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
370db63039fSRichard Tran Mills   PetscScalar       beta;
371a84739b8SRichard Tran Mills   char              matdescra[6];
372ff03dc53SRichard Tran Mills 
373ff03dc53SRichard Tran Mills   PetscFunctionBegin;
374a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
375a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
376a84739b8SRichard Tran Mills 
377ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
378ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
379ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
380ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
381ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
382ff03dc53SRichard Tran Mills 
383ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
384a84739b8SRichard Tran Mills   if (zz == yy) {
385a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
386db63039fSRichard Tran Mills     beta = 1.0;
387db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
388a84739b8SRichard Tran Mills   } else {
389db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
390db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
391db63039fSRichard Tran Mills     beta = 0.0;
392db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
393ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
394ff03dc53SRichard Tran Mills       z[i] += y[i];
395ff03dc53SRichard Tran Mills     }
396a84739b8SRichard Tran Mills   }
397ff03dc53SRichard Tran Mills 
398ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
399ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
400ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
401ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
402ff03dc53SRichard Tran Mills }
403ff03dc53SRichard Tran Mills 
404d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
405df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
406df555b71SRichard Tran Mills {
407df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
408df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
409df555b71SRichard Tran Mills   const PetscScalar *x;
410df555b71SRichard Tran Mills   PetscScalar       *y,*z;
411df555b71SRichard Tran Mills   PetscErrorCode    ierr;
412df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
413df555b71SRichard Tran Mills   PetscInt          i;
414df555b71SRichard Tran Mills 
415df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
416df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
417df555b71SRichard Tran Mills 
418df555b71SRichard Tran Mills   PetscFunctionBegin;
419df555b71SRichard Tran Mills 
4208d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
4218d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
422df555b71SRichard Tran Mills 
423df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
424df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
425df555b71SRichard Tran Mills 
4263fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4273fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4283fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4293fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
4303fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4313fa15762SRichard Tran Mills   }
4323fa15762SRichard Tran Mills 
433df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
434df555b71SRichard Tran Mills   if (zz == yy) {
435df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
436df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
437db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
438df555b71SRichard Tran Mills   } else {
439df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
440df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
441db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
442df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
443df555b71SRichard Tran Mills       z[i] += y[i];
444df555b71SRichard Tran Mills     }
445df555b71SRichard Tran Mills   }
446df555b71SRichard Tran Mills 
447df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
448df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
449df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
450df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
451df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
452df555b71SRichard Tran Mills   }
453df555b71SRichard Tran Mills   PetscFunctionReturn(0);
454df555b71SRichard Tran Mills }
455d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
456df555b71SRichard Tran Mills 
457ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
458ff03dc53SRichard Tran Mills {
459ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
460ff03dc53SRichard Tran Mills   const PetscScalar *x;
461ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
462ff03dc53SRichard Tran Mills   const MatScalar   *aa;
463ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
464ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
465db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
466ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
467ff03dc53SRichard Tran Mills   PetscInt          i;
468ff03dc53SRichard Tran Mills 
469ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
470ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
471a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
472db63039fSRichard Tran Mills   PetscScalar       beta;
473a84739b8SRichard Tran Mills   char              matdescra[6];
4744a2a386eSRichard Tran Mills 
4754a2a386eSRichard Tran Mills   PetscFunctionBegin;
476a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
477a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
478a84739b8SRichard Tran Mills 
4794a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4804a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
4814a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4824a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4834a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4844a2a386eSRichard Tran Mills 
4854a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
486a84739b8SRichard Tran Mills   if (zz == yy) {
487a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
488db63039fSRichard Tran Mills     beta = 1.0;
489969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
490a84739b8SRichard Tran Mills   } else {
491db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
492db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
493db63039fSRichard Tran Mills     beta = 0.0;
494db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
495969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
4964a2a386eSRichard Tran Mills       z[i] += y[i];
4974a2a386eSRichard Tran Mills     }
498a84739b8SRichard Tran Mills   }
4994a2a386eSRichard Tran Mills 
5004a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
5014a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5024a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
5034a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
5044a2a386eSRichard Tran Mills }
5054a2a386eSRichard Tran Mills 
506d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
507df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
508df555b71SRichard Tran Mills {
509df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
510df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
511df555b71SRichard Tran Mills   const PetscScalar *x;
512df555b71SRichard Tran Mills   PetscScalar       *y,*z;
513df555b71SRichard Tran Mills   PetscErrorCode    ierr;
514969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
515df555b71SRichard Tran Mills   PetscInt          i;
516df555b71SRichard Tran Mills 
517df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
518df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
519df555b71SRichard Tran Mills 
520df555b71SRichard Tran Mills   PetscFunctionBegin;
521df555b71SRichard Tran Mills 
5228d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
5238d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
524f36dfe3fSRichard Tran Mills 
525df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
527df555b71SRichard Tran Mills 
5283fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5293fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5303fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5313fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
5323fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5333fa15762SRichard Tran Mills   }
5343fa15762SRichard Tran Mills 
535df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
536df555b71SRichard Tran Mills   if (zz == yy) {
537df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
538df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
539db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
540df555b71SRichard Tran Mills   } else {
541df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
542df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
543db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
544969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
545df555b71SRichard Tran Mills       z[i] += y[i];
546df555b71SRichard Tran Mills     }
547df555b71SRichard Tran Mills   }
548df555b71SRichard Tran Mills 
549df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
550df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
551df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
552df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
553df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
554df555b71SRichard Tran Mills   }
555df555b71SRichard Tran Mills   PetscFunctionReturn(0);
556df555b71SRichard Tran Mills }
557d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
558df555b71SRichard Tran Mills 
55987c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
560db63039fSRichard Tran Mills {
561db63039fSRichard Tran Mills   PetscErrorCode ierr;
562db63039fSRichard Tran Mills 
56387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
564db63039fSRichard Tran Mills   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
565db63039fSRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
566db63039fSRichard Tran Mills   PetscFunctionReturn(0);
567db63039fSRichard Tran Mills }
568df555b71SRichard Tran Mills 
56987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
57087c2a1d7SRichard Tran Mills {
57187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
57287c2a1d7SRichard Tran Mills 
57387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
57487c2a1d7SRichard Tran Mills   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
57587c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
57687c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
57787c2a1d7SRichard Tran Mills }
57887c2a1d7SRichard Tran Mills 
57987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
58087c2a1d7SRichard Tran Mills {
58187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
58287c2a1d7SRichard Tran Mills 
58387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
58487c2a1d7SRichard Tran Mills   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
58587c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
58687c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
58787c2a1d7SRichard Tran Mills }
58887c2a1d7SRichard Tran Mills 
58987c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
59087c2a1d7SRichard Tran Mills {
59187c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
59287c2a1d7SRichard Tran Mills 
59387c2a1d7SRichard Tran Mills   PetscFunctionBegin;
59487c2a1d7SRichard Tran Mills   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
59587c2a1d7SRichard Tran Mills   if (str == SAME_NONZERO_PATTERN) {
59687c2a1d7SRichard Tran Mills     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
59787c2a1d7SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
59887c2a1d7SRichard Tran Mills   }
59987c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
60087c2a1d7SRichard Tran Mills }
60187c2a1d7SRichard Tran Mills 
6024a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
6034a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
6044a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
6054a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
6064a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
6074a2a386eSRichard Tran Mills {
6084a2a386eSRichard Tran Mills   PetscErrorCode ierr;
6094a2a386eSRichard Tran Mills   Mat            B = *newmat;
6104a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
611c9d46305SRichard Tran Mills   PetscBool      set;
612e9c94282SRichard Tran Mills   PetscBool      sametype;
6134a2a386eSRichard Tran Mills 
6144a2a386eSRichard Tran Mills   PetscFunctionBegin;
6154a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
6164a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
6174a2a386eSRichard Tran Mills   }
6184a2a386eSRichard Tran Mills 
619e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
620e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
621e9c94282SRichard Tran Mills 
6224a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
6234a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
6244a2a386eSRichard Tran Mills 
625df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
626969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
6274a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
6284a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
6294a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
630c9d46305SRichard Tran Mills 
6314abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
632d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
633d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
634d995685eSRichard Tran Mills #elif
635d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
636d995685eSRichard Tran Mills #endif
6374abfa3b3SRichard Tran Mills 
6384abfa3b3SRichard Tran Mills   /* Parse command line options. */
639c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
640c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
641c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
642d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
643d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
644d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
645d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
646d995685eSRichard Tran Mills   }
647d995685eSRichard Tran Mills #endif
648c9d46305SRichard Tran Mills 
649c9d46305SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
650d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
651df555b71SRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
652969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
653df555b71SRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
654969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
655d995685eSRichard Tran Mills #endif
656c9d46305SRichard Tran Mills   } else {
6574a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
658969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
6594a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
660969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
661c9d46305SRichard Tran Mills   }
6624a2a386eSRichard Tran Mills 
663db63039fSRichard Tran Mills   B->ops->scale              = MatScale_SeqAIJMKL;
66487c2a1d7SRichard Tran Mills   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
66587c2a1d7SRichard Tran Mills   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
66687c2a1d7SRichard Tran Mills   B->ops->axpy               = MatAXPY_SeqAIJMKL;
667db63039fSRichard Tran Mills 
668db63039fSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
6694a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
670e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
671e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
672e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
6734a2a386eSRichard Tran Mills 
6744a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
6754a2a386eSRichard Tran Mills   *newmat = B;
6764a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6774a2a386eSRichard Tran Mills }
6784a2a386eSRichard Tran Mills 
6794a2a386eSRichard Tran Mills /*@C
6804a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
6814a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
6824a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
68390147e49SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
68490147e49SRichard Tran Mills    operations are currently supported.
68590147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
68690147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
68790147e49SRichard Tran Mills 
6884a2a386eSRichard Tran Mills    Collective on MPI_Comm
6894a2a386eSRichard Tran Mills 
6904a2a386eSRichard Tran Mills    Input Parameters:
6914a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
6924a2a386eSRichard Tran Mills .  m - number of rows
6934a2a386eSRichard Tran Mills .  n - number of columns
6944a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
6954a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
6964a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
6974a2a386eSRichard Tran Mills 
6984a2a386eSRichard Tran Mills    Output Parameter:
6994a2a386eSRichard Tran Mills .  A - the matrix
7004a2a386eSRichard Tran Mills 
70190147e49SRichard Tran Mills    Options Database Keys:
70290147e49SRichard Tran Mills .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
70390147e49SRichard Tran Mills 
7044a2a386eSRichard Tran Mills    Notes:
7054a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
7064a2a386eSRichard Tran Mills 
7074a2a386eSRichard Tran Mills    Level: intermediate
7084a2a386eSRichard Tran Mills 
70990147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel
7104a2a386eSRichard Tran Mills 
7114a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
7124a2a386eSRichard Tran Mills @*/
7134a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
7144a2a386eSRichard Tran Mills {
7154a2a386eSRichard Tran Mills   PetscErrorCode ierr;
7164a2a386eSRichard Tran Mills 
7174a2a386eSRichard Tran Mills   PetscFunctionBegin;
7184a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
7194a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
7204a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
7214a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
7224a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7234a2a386eSRichard Tran Mills }
7244a2a386eSRichard Tran Mills 
7254a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
7264a2a386eSRichard Tran Mills {
7274a2a386eSRichard Tran Mills   PetscErrorCode ierr;
7284a2a386eSRichard Tran Mills 
7294a2a386eSRichard Tran Mills   PetscFunctionBegin;
7304a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
7314a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
7324a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7334a2a386eSRichard Tran Mills }
734