xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 54871a98d0ef4c68e7311a77dff96962372cd4af)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
164a2a386eSRichard Tran Mills   /* "Handle" used by SpMV2 inspector-executor routines. */
174a2a386eSRichard Tran Mills   sparse_matrix_t csrA;
184a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
194a2a386eSRichard Tran Mills 
204a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
214a2a386eSRichard Tran Mills 
224a2a386eSRichard Tran Mills #undef __FUNCT__
234a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
244a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
254a2a386eSRichard Tran Mills {
264a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
274a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
284a2a386eSRichard Tran Mills   PetscErrorCode ierr;
294a2a386eSRichard Tran Mills   Mat            B       = *newmat;
304a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
314a2a386eSRichard Tran Mills 
324a2a386eSRichard Tran Mills   PetscFunctionBegin;
334a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
344a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
354a2a386eSRichard Tran Mills   }
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
38*54871a98SRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJ;
394a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
404a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJ;
41*54871a98SRichard Tran Mills   B->ops->mult        = MatMult_SeqAIJ;
42*54871a98SRichard Tran Mills   B->ops->multadd     = MatMultAdd_SeqAIJ;
434a2a386eSRichard Tran Mills 
444a2a386eSRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure.
454a2a386eSRichard Tran Mills    * We don't free the Mat_SeqAIJMKL struct itself, as this will
464a2a386eSRichard Tran Mills    * cause problems later when MatDestroy() tries to free it. */
474a2a386eSRichard Tran Mills   /* Actually there is nothing to do here right now.
484a2a386eSRichard Tran Mills    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
494a2a386eSRichard Tran Mills    * see if there is some way to clean up the "handle" used by SpMV2. */
504a2a386eSRichard Tran Mills 
514a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
524a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
534a2a386eSRichard Tran Mills 
544a2a386eSRichard Tran Mills   *newmat = B;
554a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
564a2a386eSRichard Tran Mills }
574a2a386eSRichard Tran Mills 
584a2a386eSRichard Tran Mills #undef __FUNCT__
594a2a386eSRichard Tran Mills #define __FUNCT__ "MatDestroy_SeqAIJMKL"
604a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
614a2a386eSRichard Tran Mills {
624a2a386eSRichard Tran Mills   PetscErrorCode ierr;
634a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
644a2a386eSRichard Tran Mills 
654a2a386eSRichard Tran Mills   PetscFunctionBegin;
664a2a386eSRichard Tran Mills   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
674a2a386eSRichard Tran Mills   mkl_sparse_destroy(aijmkl->csrA);
684a2a386eSRichard Tran Mills   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
694a2a386eSRichard Tran Mills 
704a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
714a2a386eSRichard Tran Mills    * to destroy everything that remains. */
724a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
734a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
744a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
754a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
764a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
774a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
784a2a386eSRichard Tran Mills }
794a2a386eSRichard Tran Mills 
804a2a386eSRichard Tran Mills #undef __FUNCT__
814a2a386eSRichard Tran Mills #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
824a2a386eSRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
834a2a386eSRichard Tran Mills {
844a2a386eSRichard Tran Mills   PetscErrorCode ierr;
854a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
864a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
874a2a386eSRichard Tran Mills 
884a2a386eSRichard Tran Mills   PetscFunctionBegin;
894a2a386eSRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
904a2a386eSRichard Tran Mills   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
914a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
924a2a386eSRichard Tran Mills }
934a2a386eSRichard Tran Mills 
944a2a386eSRichard Tran Mills #undef __FUNCT__
954a2a386eSRichard Tran Mills #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
964a2a386eSRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
974a2a386eSRichard Tran Mills {
984a2a386eSRichard Tran Mills   PetscErrorCode ierr;
994a2a386eSRichard Tran Mills   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1004a2a386eSRichard Tran Mills 
1014a2a386eSRichard Tran Mills   PetscFunctionBegin;
1024a2a386eSRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1034a2a386eSRichard Tran Mills 
1044a2a386eSRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
1054a2a386eSRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
1064a2a386eSRichard Tran Mills    * routine for a MATSEQAIJ.
1074a2a386eSRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
1084a2a386eSRichard Tran Mills    * a lot of code duplication.
1094a2a386eSRichard Tran Mills    * I also note that currently MATSEQAIJMKL doesn't know anything about
1104a2a386eSRichard Tran Mills    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
1114a2a386eSRichard Tran Mills    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
1124a2a386eSRichard Tran Mills    * this, this may break things.  (Don't know... haven't looked at it.
1134a2a386eSRichard Tran Mills    * Do I need to disable this somehow?) */
1144a2a386eSRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
1154a2a386eSRichard Tran Mills   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
1164a2a386eSRichard Tran Mills 
1174a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1184a2a386eSRichard Tran Mills }
1194a2a386eSRichard Tran Mills 
1204a2a386eSRichard Tran Mills #undef __FUNCT__
1214a2a386eSRichard Tran Mills #define __FUNCT__ "MatMult_SeqAIJMKL"
1224a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
1234a2a386eSRichard Tran Mills {
1244a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1254a2a386eSRichard Tran Mills   const PetscScalar *x;
1264a2a386eSRichard Tran Mills   PetscScalar       *y;
1274a2a386eSRichard Tran Mills   const MatScalar   *aa;
1284a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
1294a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
1304a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
1314a2a386eSRichard Tran Mills   PetscInt          i;
1324a2a386eSRichard Tran Mills 
1334a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
1344a2a386eSRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are computing the transpose product. */
1354a2a386eSRichard Tran Mills 
1364a2a386eSRichard Tran Mills   PetscFunctionBegin;
1374a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1384a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1394a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
1404a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
1414a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1424a2a386eSRichard Tran Mills 
1434a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
1444a2a386eSRichard Tran Mills   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
1454a2a386eSRichard Tran Mills 
1464a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
1474a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1484a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1494a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1504a2a386eSRichard Tran Mills }
1514a2a386eSRichard Tran Mills 
1524a2a386eSRichard Tran Mills #undef __FUNCT__
1534a2a386eSRichard Tran Mills #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
1544a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
1554a2a386eSRichard Tran Mills {
1564a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1574a2a386eSRichard Tran Mills   const PetscScalar *x;
1584a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
1594a2a386eSRichard Tran Mills   const MatScalar   *aa;
1604a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
1614a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
1624a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
1634a2a386eSRichard Tran Mills   PetscInt          i;
1644a2a386eSRichard Tran Mills 
1654a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
1664a2a386eSRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are computing the transpose product. */
1674a2a386eSRichard Tran Mills 
1684a2a386eSRichard Tran Mills   PetscFunctionBegin;
1694a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1704a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
1714a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
1724a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
1734a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
1744a2a386eSRichard Tran Mills 
1754a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
1764a2a386eSRichard Tran Mills   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
1774a2a386eSRichard Tran Mills 
1784a2a386eSRichard Tran Mills   /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */
1794a2a386eSRichard Tran Mills   for (i=0; i<m; i++) {
1804a2a386eSRichard Tran Mills     z[i] += y[i];
1814a2a386eSRichard Tran Mills   }
1824a2a386eSRichard Tran Mills 
1834a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1844a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1854a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
1864a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1874a2a386eSRichard Tran Mills }
1884a2a386eSRichard Tran Mills 
1894a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1904a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
1914a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
1924a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
1934a2a386eSRichard Tran Mills #undef __FUNCT__
1944a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
1954a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1964a2a386eSRichard Tran Mills {
1974a2a386eSRichard Tran Mills   PetscErrorCode ierr;
1984a2a386eSRichard Tran Mills   Mat            B = *newmat;
1994a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2004a2a386eSRichard Tran Mills 
2014a2a386eSRichard Tran Mills   PetscFunctionBegin;
2024a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
2034a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
2044a2a386eSRichard Tran Mills   }
2054a2a386eSRichard Tran Mills 
2064a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
2074a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
2084a2a386eSRichard Tran Mills 
2094a2a386eSRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override. */
2104a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
2114a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
2124a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
2134a2a386eSRichard Tran Mills   B->ops->mult        = MatMult_SeqAIJMKL;
2144a2a386eSRichard Tran Mills   B->ops->multadd     = MatMultAdd_SeqAIJMKL;
2154a2a386eSRichard Tran Mills 
2164a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
2174a2a386eSRichard Tran Mills 
2184a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
2194a2a386eSRichard Tran Mills   *newmat = B;
2204a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2214a2a386eSRichard Tran Mills }
2224a2a386eSRichard Tran Mills 
2234a2a386eSRichard Tran Mills #undef __FUNCT__
2244a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreateSeqAIJMKL"
2254a2a386eSRichard Tran Mills /*@C
2264a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
2274a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
2284a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
2294a2a386eSRichard Tran Mills    Collective on MPI_Comm
2304a2a386eSRichard Tran Mills 
2314a2a386eSRichard Tran Mills    Input Parameters:
2324a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
2334a2a386eSRichard Tran Mills .  m - number of rows
2344a2a386eSRichard Tran Mills .  n - number of columns
2354a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
2364a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
2374a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
2384a2a386eSRichard Tran Mills 
2394a2a386eSRichard Tran Mills    Output Parameter:
2404a2a386eSRichard Tran Mills .  A - the matrix
2414a2a386eSRichard Tran Mills 
2424a2a386eSRichard Tran Mills    Notes:
2434a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
2444a2a386eSRichard Tran Mills 
2454a2a386eSRichard Tran Mills    Level: intermediate
2464a2a386eSRichard Tran Mills 
2474a2a386eSRichard Tran Mills .keywords: matrix, cray, sparse, parallel
2484a2a386eSRichard Tran Mills 
2494a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
2504a2a386eSRichard Tran Mills @*/
2514a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2524a2a386eSRichard Tran Mills {
2534a2a386eSRichard Tran Mills   PetscErrorCode ierr;
2544a2a386eSRichard Tran Mills 
2554a2a386eSRichard Tran Mills   PetscFunctionBegin;
2564a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2574a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2584a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
2594a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
2604a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2614a2a386eSRichard Tran Mills }
2624a2a386eSRichard Tran Mills 
2634a2a386eSRichard Tran Mills #undef __FUNCT__
2644a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreate_SeqAIJMKL"
2654a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
2664a2a386eSRichard Tran Mills {
2674a2a386eSRichard Tran Mills   PetscErrorCode ierr;
2684a2a386eSRichard Tran Mills 
2694a2a386eSRichard Tran Mills   PetscFunctionBegin;
2704a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
2714a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2724a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2734a2a386eSRichard Tran Mills }
2744a2a386eSRichard Tran Mills 
2754a2a386eSRichard Tran Mills 
276