xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 87c2a1d7fa67ebe3ae3506c155f9d34b3f33b963)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
174abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20df555b71SRichard Tran Mills   struct matrix_descr descr;
21b8cbc1fbSRichard Tran Mills #endif
224a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
234a2a386eSRichard Tran Mills 
244a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
254a2a386eSRichard Tran Mills 
264a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
274a2a386eSRichard Tran Mills {
284a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
294a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
304a2a386eSRichard Tran Mills   PetscErrorCode ierr;
314a2a386eSRichard Tran Mills   Mat            B       = *newmat;
324a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
334a2a386eSRichard Tran Mills 
344a2a386eSRichard Tran Mills   PetscFunctionBegin;
354a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
364a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
37e9c94282SRichard Tran Mills     aijmkl = (Mat_SeqAIJMKL*)B->spptr;
384a2a386eSRichard Tran Mills   }
394a2a386eSRichard Tran Mills 
404a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4154871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
434a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4454871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
45ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
48*87c2a1d7SRichard Tran Mills   B->ops->scale            = MatScale_SeqAIJ;
49*87c2a1d7SRichard Tran Mills   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
50*87c2a1d7SRichard Tran Mills   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
51*87c2a1d7SRichard Tran Mills   B->ops->axpy             = MatAXPY_SeqAIJ;
524a2a386eSRichard Tran Mills 
53e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
54e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
55e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
56e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
57e9c94282SRichard Tran Mills 
584abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
59e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
60e9c94282SRichard Tran Mills    * the spptr pointer. */
614abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
624abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
630632b357SRichard Tran Mills     sparse_status_t stat;
644abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
654abfa3b3SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
664abfa3b3SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
674abfa3b3SRichard Tran Mills     }
684abfa3b3SRichard Tran Mills   }
694abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
70e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
714a2a386eSRichard Tran Mills 
724a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
734a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
744a2a386eSRichard Tran Mills 
754a2a386eSRichard Tran Mills   *newmat = B;
764a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
774a2a386eSRichard Tran Mills }
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
804a2a386eSRichard Tran Mills {
814a2a386eSRichard Tran Mills   PetscErrorCode ierr;
824a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
834a2a386eSRichard Tran Mills 
844a2a386eSRichard Tran Mills   PetscFunctionBegin;
85e9c94282SRichard Tran Mills 
86e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
87e9c94282SRichard Tran Mills    * spptr pointer. */
88e9c94282SRichard Tran Mills   if (aijmkl) {
894a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
904abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
914abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
924abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
934abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
944abfa3b3SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) {
954abfa3b3SRichard Tran Mills         PetscFunctionReturn(PETSC_ERR_LIB);
964abfa3b3SRichard Tran Mills       }
974abfa3b3SRichard Tran Mills     }
984abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
994a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
100e9c94282SRichard Tran Mills   }
1014a2a386eSRichard Tran Mills 
1024a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1034a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1044a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1054a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1064a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1074a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1084a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1094a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1104a2a386eSRichard Tran Mills }
1114a2a386eSRichard Tran Mills 
1126e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1134a2a386eSRichard Tran Mills {
1144a2a386eSRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
115df555b71SRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
11658678438SRichard Tran Mills   PetscInt        m,n;
1176e369cd5SRichard Tran Mills   MatScalar       *aa;
118df555b71SRichard Tran Mills   PetscInt        *aj,*ai;
1194a2a386eSRichard Tran Mills 
1206e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1216e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1226e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1236e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
1246e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1256e369cd5SRichard Tran Mills #else
1264a2a386eSRichard Tran Mills 
1276e369cd5SRichard Tran Mills   sparse_status_t stat;
1284a2a386eSRichard Tran Mills 
129df555b71SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
1306e369cd5SRichard Tran Mills 
1316e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1326e369cd5SRichard Tran Mills 
1330632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1340632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1350632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1360632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1370632b357SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
1380632b357SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
1390632b357SRichard Tran Mills     }
1400632b357SRichard Tran Mills   }
1418d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1426e369cd5SRichard Tran Mills 
143c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
144df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
145df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
146df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
14758678438SRichard Tran Mills   m = A->rmap->n;
14858678438SRichard Tran Mills   n = A->cmap->n;
149df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
150df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
151df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1528d3fe1b0SRichard Tran Mills   if (a->nz) {
1538d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1548d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
15558678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
156df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
157df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
158df555b71SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
159df555b71SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
160f68ad4bdSRichard Tran Mills       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
161df555b71SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
162df555b71SRichard Tran Mills     }
1634abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
164c9d46305SRichard Tran Mills   }
1656e369cd5SRichard Tran Mills 
1666e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
167d995685eSRichard Tran Mills #endif
1686e369cd5SRichard Tran Mills }
1696e369cd5SRichard Tran Mills 
1706e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
1716e369cd5SRichard Tran Mills {
1726e369cd5SRichard Tran Mills   PetscErrorCode ierr;
1736e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
1746e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
1756e369cd5SRichard Tran Mills 
1766e369cd5SRichard Tran Mills   PetscFunctionBegin;
1776e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
1786e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
1796e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
1806e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
1816e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
1826e369cd5SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
1836e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1846e369cd5SRichard Tran Mills }
1856e369cd5SRichard Tran Mills 
1866e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
1876e369cd5SRichard Tran Mills {
1886e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
1896e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1906e369cd5SRichard Tran Mills 
1916e369cd5SRichard Tran Mills   PetscFunctionBegin;
1926e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1936e369cd5SRichard Tran Mills 
1946e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
1956e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
1966e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
1976e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
1986e369cd5SRichard Tran Mills    * a lot of code duplication.
1996e369cd5SRichard Tran Mills    * I also note that currently MATSEQAIJMKL doesn't know anything about
2006e369cd5SRichard Tran Mills    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
2016e369cd5SRichard Tran Mills    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
2026e369cd5SRichard Tran Mills    * this, this may break things.  (Don't know... haven't looked at it.
2036e369cd5SRichard Tran Mills    * Do I need to disable this somehow?) */
2046e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
2056e369cd5SRichard Tran Mills   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
2066e369cd5SRichard Tran Mills 
2076e369cd5SRichard Tran Mills   /* Now create the MKL sparse handle (if needed; the function checks). */
2086e369cd5SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
209df555b71SRichard Tran Mills 
2104a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2114a2a386eSRichard Tran Mills }
2124a2a386eSRichard Tran Mills 
2134a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
2144a2a386eSRichard Tran Mills {
2154a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
2164a2a386eSRichard Tran Mills   const PetscScalar *x;
2174a2a386eSRichard Tran Mills   PetscScalar       *y;
2184a2a386eSRichard Tran Mills   const MatScalar   *aa;
2194a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
2204a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
221db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
222db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
223db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
2244a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
225db63039fSRichard Tran Mills   char              matdescra[6];
226db63039fSRichard Tran Mills 
2274a2a386eSRichard Tran Mills 
2284a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
229ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
230ff03dc53SRichard Tran Mills 
231ff03dc53SRichard Tran Mills   PetscFunctionBegin;
232db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
233db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
234ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
235ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
236ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
237ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
238ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
239ff03dc53SRichard Tran Mills 
240ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
241db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
242ff03dc53SRichard Tran Mills 
243ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
244ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
245ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
246ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
247ff03dc53SRichard Tran Mills }
248ff03dc53SRichard Tran Mills 
249d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
250df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
251df555b71SRichard Tran Mills {
252df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
253df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
254df555b71SRichard Tran Mills   const PetscScalar *x;
255df555b71SRichard Tran Mills   PetscScalar       *y;
256df555b71SRichard Tran Mills   PetscErrorCode    ierr;
257df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
258df555b71SRichard Tran Mills 
259df555b71SRichard Tran Mills   PetscFunctionBegin;
260df555b71SRichard Tran Mills 
2618d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
2628d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
263f36dfe3fSRichard Tran Mills 
264df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
265df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
266df555b71SRichard Tran Mills 
2673fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2683fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2693fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
2703fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
2713fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
2723fa15762SRichard Tran Mills   }
2733fa15762SRichard Tran Mills 
274df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
275df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
276df555b71SRichard Tran Mills 
277df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
278df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
279df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
280df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
281df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
282df555b71SRichard Tran Mills   }
283df555b71SRichard Tran Mills   PetscFunctionReturn(0);
284df555b71SRichard Tran Mills }
285d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
286df555b71SRichard Tran Mills 
287ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
288ff03dc53SRichard Tran Mills {
289ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
290ff03dc53SRichard Tran Mills   const PetscScalar *x;
291ff03dc53SRichard Tran Mills   PetscScalar       *y;
292ff03dc53SRichard Tran Mills   const MatScalar   *aa;
293ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
294ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
295db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
296db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
297db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
298ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
299db63039fSRichard Tran Mills   char              matdescra[6];
300ff03dc53SRichard Tran Mills 
301ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
302ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
3034a2a386eSRichard Tran Mills 
3044a2a386eSRichard Tran Mills   PetscFunctionBegin;
305969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
306969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
3074a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3084a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
3094a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
3104a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
3114a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
3124a2a386eSRichard Tran Mills 
3134a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
314db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
3154a2a386eSRichard Tran Mills 
3164a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
3174a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3184a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
3194a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3204a2a386eSRichard Tran Mills }
3214a2a386eSRichard Tran Mills 
322d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
323df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
324df555b71SRichard Tran Mills {
325df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
326df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
327df555b71SRichard Tran Mills   const PetscScalar *x;
328df555b71SRichard Tran Mills   PetscScalar       *y;
329df555b71SRichard Tran Mills   PetscErrorCode    ierr;
3300632b357SRichard Tran Mills   sparse_status_t   stat;
331df555b71SRichard Tran Mills 
332df555b71SRichard Tran Mills   PetscFunctionBegin;
333df555b71SRichard Tran Mills 
3348d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
3358d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
336f36dfe3fSRichard Tran Mills 
337df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
338df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
339df555b71SRichard Tran Mills 
3403fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3413fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3423fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3433fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
3443fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3453fa15762SRichard Tran Mills   }
3463fa15762SRichard Tran Mills 
347df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
348df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
349df555b71SRichard Tran Mills 
350df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
351df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
352df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
353df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
354df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
355df555b71SRichard Tran Mills   }
356df555b71SRichard Tran Mills   PetscFunctionReturn(0);
357df555b71SRichard Tran Mills }
358d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
359df555b71SRichard Tran Mills 
3604a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
3614a2a386eSRichard Tran Mills {
3624a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3634a2a386eSRichard Tran Mills   const PetscScalar *x;
3644a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
3654a2a386eSRichard Tran Mills   const MatScalar   *aa;
3664a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3674a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
368db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
3694a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
3704a2a386eSRichard Tran Mills   PetscInt          i;
3714a2a386eSRichard Tran Mills 
372ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
373ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
374a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
375db63039fSRichard Tran Mills   PetscScalar       beta;
376a84739b8SRichard Tran Mills   char              matdescra[6];
377ff03dc53SRichard Tran Mills 
378ff03dc53SRichard Tran Mills   PetscFunctionBegin;
379a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
380a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
381a84739b8SRichard Tran Mills 
382ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
383ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
384ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
385ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
386ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
387ff03dc53SRichard Tran Mills 
388ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
389a84739b8SRichard Tran Mills   if (zz == yy) {
390a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
391db63039fSRichard Tran Mills     beta = 1.0;
392db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
393a84739b8SRichard Tran Mills   } else {
394db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
395db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
396db63039fSRichard Tran Mills     beta = 0.0;
397db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
398ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
399ff03dc53SRichard Tran Mills       z[i] += y[i];
400ff03dc53SRichard Tran Mills     }
401a84739b8SRichard Tran Mills   }
402ff03dc53SRichard Tran Mills 
403ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
404ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
405ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
406ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
407ff03dc53SRichard Tran Mills }
408ff03dc53SRichard Tran Mills 
409d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
410df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
411df555b71SRichard Tran Mills {
412df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
413df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
414df555b71SRichard Tran Mills   const PetscScalar *x;
415df555b71SRichard Tran Mills   PetscScalar       *y,*z;
416df555b71SRichard Tran Mills   PetscErrorCode    ierr;
417df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
418df555b71SRichard Tran Mills   PetscInt          i;
419df555b71SRichard Tran Mills 
420df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
421df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
422df555b71SRichard Tran Mills 
423df555b71SRichard Tran Mills   PetscFunctionBegin;
424df555b71SRichard Tran Mills 
4258d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
4268d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
427df555b71SRichard Tran Mills 
428df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
429df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
430df555b71SRichard Tran Mills 
4313fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4323fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4333fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4343fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
4353fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4363fa15762SRichard Tran Mills   }
4373fa15762SRichard Tran Mills 
438df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
439df555b71SRichard Tran Mills   if (zz == yy) {
440df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
441df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
442db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
443df555b71SRichard Tran Mills   } else {
444df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
445df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
446db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
447df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
448df555b71SRichard Tran Mills       z[i] += y[i];
449df555b71SRichard Tran Mills     }
450df555b71SRichard Tran Mills   }
451df555b71SRichard Tran Mills 
452df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
453df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
454df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
455df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
456df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
457df555b71SRichard Tran Mills   }
458df555b71SRichard Tran Mills   PetscFunctionReturn(0);
459df555b71SRichard Tran Mills }
460d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
461df555b71SRichard Tran Mills 
462ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
463ff03dc53SRichard Tran Mills {
464ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
465ff03dc53SRichard Tran Mills   const PetscScalar *x;
466ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
467ff03dc53SRichard Tran Mills   const MatScalar   *aa;
468ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
469ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
470db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
471ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
472ff03dc53SRichard Tran Mills   PetscInt          i;
473ff03dc53SRichard Tran Mills 
474ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
475ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
476a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
477db63039fSRichard Tran Mills   PetscScalar       beta;
478a84739b8SRichard Tran Mills   char              matdescra[6];
4794a2a386eSRichard Tran Mills 
4804a2a386eSRichard Tran Mills   PetscFunctionBegin;
481a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
482a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
483a84739b8SRichard Tran Mills 
4844a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4854a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
4864a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4874a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4884a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4894a2a386eSRichard Tran Mills 
4904a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
491a84739b8SRichard Tran Mills   if (zz == yy) {
492a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
493db63039fSRichard Tran Mills     beta = 1.0;
494969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
495a84739b8SRichard Tran Mills   } else {
496db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
497db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
498db63039fSRichard Tran Mills     beta = 0.0;
499db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
500969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
5014a2a386eSRichard Tran Mills       z[i] += y[i];
5024a2a386eSRichard Tran Mills     }
503a84739b8SRichard Tran Mills   }
5044a2a386eSRichard Tran Mills 
5054a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
5064a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5074a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
5084a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
5094a2a386eSRichard Tran Mills }
5104a2a386eSRichard Tran Mills 
511d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
512df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
513df555b71SRichard Tran Mills {
514df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
515df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
516df555b71SRichard Tran Mills   const PetscScalar *x;
517df555b71SRichard Tran Mills   PetscScalar       *y,*z;
518df555b71SRichard Tran Mills   PetscErrorCode    ierr;
519969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
520df555b71SRichard Tran Mills   PetscInt          i;
521df555b71SRichard Tran Mills 
522df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
523df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
524df555b71SRichard Tran Mills 
525df555b71SRichard Tran Mills   PetscFunctionBegin;
526df555b71SRichard Tran Mills 
5278d3fe1b0SRichard Tran Mills   /* If there are no nonzero entries, this is a no-op: return immediately. */
5288d3fe1b0SRichard Tran Mills   if(!a->nz) PetscFunctionReturn(0);
529f36dfe3fSRichard Tran Mills 
530df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
531df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
532df555b71SRichard Tran Mills 
5333fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5343fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5353fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5363fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
5373fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5383fa15762SRichard Tran Mills   }
5393fa15762SRichard Tran Mills 
540df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
541df555b71SRichard Tran Mills   if (zz == yy) {
542df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
543df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
544db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
545df555b71SRichard Tran Mills   } else {
546df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
547df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
548db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
549969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
550df555b71SRichard Tran Mills       z[i] += y[i];
551df555b71SRichard Tran Mills     }
552df555b71SRichard Tran Mills   }
553df555b71SRichard Tran Mills 
554df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
555df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
556df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
557df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
558df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
559df555b71SRichard Tran Mills   }
560df555b71SRichard Tran Mills   PetscFunctionReturn(0);
561df555b71SRichard Tran Mills }
562d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
563df555b71SRichard Tran Mills 
564*87c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
565db63039fSRichard Tran Mills {
566db63039fSRichard Tran Mills   PetscErrorCode ierr;
567db63039fSRichard Tran Mills 
568*87c2a1d7SRichard Tran Mills   PetscFunctionBegin;
569db63039fSRichard Tran Mills   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
570db63039fSRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
571db63039fSRichard Tran Mills   PetscFunctionReturn(0);
572db63039fSRichard Tran Mills }
573df555b71SRichard Tran Mills 
574*87c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
575*87c2a1d7SRichard Tran Mills {
576*87c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
577*87c2a1d7SRichard Tran Mills 
578*87c2a1d7SRichard Tran Mills   PetscFunctionBegin;
579*87c2a1d7SRichard Tran Mills   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
580*87c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
581*87c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
582*87c2a1d7SRichard Tran Mills }
583*87c2a1d7SRichard Tran Mills 
584*87c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
585*87c2a1d7SRichard Tran Mills {
586*87c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
587*87c2a1d7SRichard Tran Mills 
588*87c2a1d7SRichard Tran Mills   PetscFunctionBegin;
589*87c2a1d7SRichard Tran Mills   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
590*87c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
591*87c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
592*87c2a1d7SRichard Tran Mills }
593*87c2a1d7SRichard Tran Mills 
594*87c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
595*87c2a1d7SRichard Tran Mills {
596*87c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
597*87c2a1d7SRichard Tran Mills 
598*87c2a1d7SRichard Tran Mills   PetscFunctionBegin;
599*87c2a1d7SRichard Tran Mills   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
600*87c2a1d7SRichard Tran Mills   if (str == SAME_NONZERO_PATTERN) {
601*87c2a1d7SRichard Tran Mills     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
602*87c2a1d7SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
603*87c2a1d7SRichard Tran Mills   }
604*87c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
605*87c2a1d7SRichard Tran Mills }
606*87c2a1d7SRichard Tran Mills 
6074a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
6084a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
6094a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
6104a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
6114a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
6124a2a386eSRichard Tran Mills {
6134a2a386eSRichard Tran Mills   PetscErrorCode ierr;
6144a2a386eSRichard Tran Mills   Mat            B = *newmat;
6154a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
616c9d46305SRichard Tran Mills   PetscBool      set;
617e9c94282SRichard Tran Mills   PetscBool      sametype;
6184a2a386eSRichard Tran Mills 
6194a2a386eSRichard Tran Mills   PetscFunctionBegin;
6204a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
6214a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
6224a2a386eSRichard Tran Mills   }
6234a2a386eSRichard Tran Mills 
624e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
625e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
626e9c94282SRichard Tran Mills 
6274a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
6284a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
6294a2a386eSRichard Tran Mills 
630df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
631969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
6324a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
6334a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
6344a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
635c9d46305SRichard Tran Mills 
6364abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
637d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
638d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
639d995685eSRichard Tran Mills #elif
640d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
641d995685eSRichard Tran Mills #endif
6424abfa3b3SRichard Tran Mills 
6434abfa3b3SRichard Tran Mills   /* Parse command line options. */
644c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
645c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
646c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
647d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
648d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
649d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
650d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
651d995685eSRichard Tran Mills   }
652d995685eSRichard Tran Mills #endif
653c9d46305SRichard Tran Mills 
654c9d46305SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
655d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
656df555b71SRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
657969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
658df555b71SRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
659969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
660d995685eSRichard Tran Mills #endif
661c9d46305SRichard Tran Mills   } else {
6624a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
663969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
6644a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
665969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
666c9d46305SRichard Tran Mills   }
6674a2a386eSRichard Tran Mills 
668db63039fSRichard Tran Mills   B->ops->scale              = MatScale_SeqAIJMKL;
669*87c2a1d7SRichard Tran Mills   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
670*87c2a1d7SRichard Tran Mills   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
671*87c2a1d7SRichard Tran Mills   B->ops->axpy               = MatAXPY_SeqAIJMKL;
672db63039fSRichard Tran Mills 
673db63039fSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
6744a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
675e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
676e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
677e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
6784a2a386eSRichard Tran Mills 
6794a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
6804a2a386eSRichard Tran Mills   *newmat = B;
6814a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6824a2a386eSRichard Tran Mills }
6834a2a386eSRichard Tran Mills 
6844a2a386eSRichard Tran Mills /*@C
6854a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
6864a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
6874a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
6884a2a386eSRichard Tran Mills    Collective on MPI_Comm
6894a2a386eSRichard Tran Mills 
6904a2a386eSRichard Tran Mills    Input Parameters:
6914a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
6924a2a386eSRichard Tran Mills .  m - number of rows
6934a2a386eSRichard Tran Mills .  n - number of columns
6944a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
6954a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
6964a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
6974a2a386eSRichard Tran Mills 
6984a2a386eSRichard Tran Mills    Output Parameter:
6994a2a386eSRichard Tran Mills .  A - the matrix
7004a2a386eSRichard Tran Mills 
7014a2a386eSRichard Tran Mills    Notes:
7024a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
7034a2a386eSRichard Tran Mills 
7044a2a386eSRichard Tran Mills    Level: intermediate
7054a2a386eSRichard Tran Mills 
7064a2a386eSRichard Tran Mills .keywords: matrix, cray, sparse, parallel
7074a2a386eSRichard Tran Mills 
7084a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
7094a2a386eSRichard Tran Mills @*/
7104a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
7114a2a386eSRichard Tran Mills {
7124a2a386eSRichard Tran Mills   PetscErrorCode ierr;
7134a2a386eSRichard Tran Mills 
7144a2a386eSRichard Tran Mills   PetscFunctionBegin;
7154a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
7164a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
7174a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
7184a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
7194a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7204a2a386eSRichard Tran Mills }
7214a2a386eSRichard Tran Mills 
7224a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
7234a2a386eSRichard Tran Mills {
7244a2a386eSRichard Tran Mills   PetscErrorCode ierr;
7254a2a386eSRichard Tran Mills 
7264a2a386eSRichard Tran Mills   PetscFunctionBegin;
7274a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
7284a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
7294a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
7304a2a386eSRichard Tran Mills }
731