14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 164a2a386eSRichard Tran Mills /* "Handle" used by SpMV2 inspector-executor routines. */ 174a2a386eSRichard Tran Mills sparse_matrix_t csrA; 184a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 194a2a386eSRichard Tran Mills 204a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 214a2a386eSRichard Tran Mills 224a2a386eSRichard Tran Mills #undef __FUNCT__ 234a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 244a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 254a2a386eSRichard Tran Mills { 264a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 274a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 284a2a386eSRichard Tran Mills PetscErrorCode ierr; 294a2a386eSRichard Tran Mills Mat B = *newmat; 304a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 314a2a386eSRichard Tran Mills 324a2a386eSRichard Tran Mills PetscFunctionBegin; 334a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 344a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 354a2a386eSRichard Tran Mills } 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 38*54871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 394a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 404a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 41*54871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 42*54871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 434a2a386eSRichard Tran Mills 444a2a386eSRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. 454a2a386eSRichard Tran Mills * We don't free the Mat_SeqAIJMKL struct itself, as this will 464a2a386eSRichard Tran Mills * cause problems later when MatDestroy() tries to free it. */ 474a2a386eSRichard Tran Mills /* Actually there is nothing to do here right now. 484a2a386eSRichard Tran Mills * When I've added use of the MKL SpMV2 inspector-executor routines, I should 494a2a386eSRichard Tran Mills * see if there is some way to clean up the "handle" used by SpMV2. */ 504a2a386eSRichard Tran Mills 514a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 524a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 534a2a386eSRichard Tran Mills 544a2a386eSRichard Tran Mills *newmat = B; 554a2a386eSRichard Tran Mills PetscFunctionReturn(0); 564a2a386eSRichard Tran Mills } 574a2a386eSRichard Tran Mills 584a2a386eSRichard Tran Mills #undef __FUNCT__ 594a2a386eSRichard Tran Mills #define __FUNCT__ "MatDestroy_SeqAIJMKL" 604a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 614a2a386eSRichard Tran Mills { 624a2a386eSRichard Tran Mills PetscErrorCode ierr; 634a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 644a2a386eSRichard Tran Mills 654a2a386eSRichard Tran Mills PetscFunctionBegin; 664a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 674a2a386eSRichard Tran Mills mkl_sparse_destroy(aijmkl->csrA); 684a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 694a2a386eSRichard Tran Mills 704a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 714a2a386eSRichard Tran Mills * to destroy everything that remains. */ 724a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 734a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 744a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 754a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 764a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 774a2a386eSRichard Tran Mills PetscFunctionReturn(0); 784a2a386eSRichard Tran Mills } 794a2a386eSRichard Tran Mills 804a2a386eSRichard Tran Mills #undef __FUNCT__ 814a2a386eSRichard Tran Mills #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 824a2a386eSRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 834a2a386eSRichard Tran Mills { 844a2a386eSRichard Tran Mills PetscErrorCode ierr; 854a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 864a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 874a2a386eSRichard Tran Mills 884a2a386eSRichard Tran Mills PetscFunctionBegin; 894a2a386eSRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 904a2a386eSRichard Tran Mills ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 914a2a386eSRichard Tran Mills PetscFunctionReturn(0); 924a2a386eSRichard Tran Mills } 934a2a386eSRichard Tran Mills 944a2a386eSRichard Tran Mills #undef __FUNCT__ 954a2a386eSRichard Tran Mills #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 964a2a386eSRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 974a2a386eSRichard Tran Mills { 984a2a386eSRichard Tran Mills PetscErrorCode ierr; 994a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1004a2a386eSRichard Tran Mills 1014a2a386eSRichard Tran Mills PetscFunctionBegin; 1024a2a386eSRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1034a2a386eSRichard Tran Mills 1044a2a386eSRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 1054a2a386eSRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 1064a2a386eSRichard Tran Mills * routine for a MATSEQAIJ. 1074a2a386eSRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 1084a2a386eSRichard Tran Mills * a lot of code duplication. 1094a2a386eSRichard Tran Mills * I also note that currently MATSEQAIJMKL doesn't know anything about 1104a2a386eSRichard Tran Mills * the Mat_CompressedRow data structure that SeqAIJ now uses when there 1114a2a386eSRichard Tran Mills * are many zero rows. If the SeqAIJ assembly end routine decides to use 1124a2a386eSRichard Tran Mills * this, this may break things. (Don't know... haven't looked at it. 1134a2a386eSRichard Tran Mills * Do I need to disable this somehow?) */ 1144a2a386eSRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 1154a2a386eSRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 1164a2a386eSRichard Tran Mills 1174a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1184a2a386eSRichard Tran Mills } 1194a2a386eSRichard Tran Mills 1204a2a386eSRichard Tran Mills #undef __FUNCT__ 1214a2a386eSRichard Tran Mills #define __FUNCT__ "MatMult_SeqAIJMKL" 1224a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 1234a2a386eSRichard Tran Mills { 1244a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1254a2a386eSRichard Tran Mills const PetscScalar *x; 1264a2a386eSRichard Tran Mills PetscScalar *y; 1274a2a386eSRichard Tran Mills const MatScalar *aa; 1284a2a386eSRichard Tran Mills PetscErrorCode ierr; 1294a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 1304a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 1314a2a386eSRichard Tran Mills PetscInt i; 1324a2a386eSRichard Tran Mills 1334a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 1344a2a386eSRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 1354a2a386eSRichard Tran Mills 1364a2a386eSRichard Tran Mills PetscFunctionBegin; 1374a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1384a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1394a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 1404a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 1414a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1424a2a386eSRichard Tran Mills 1434a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 1444a2a386eSRichard Tran Mills mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 1454a2a386eSRichard Tran Mills 1464a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 1474a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1484a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1494a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1504a2a386eSRichard Tran Mills } 1514a2a386eSRichard Tran Mills 1524a2a386eSRichard Tran Mills #undef __FUNCT__ 1534a2a386eSRichard Tran Mills #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 1544a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 1554a2a386eSRichard Tran Mills { 1564a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1574a2a386eSRichard Tran Mills const PetscScalar *x; 1584a2a386eSRichard Tran Mills PetscScalar *y,*z; 1594a2a386eSRichard Tran Mills const MatScalar *aa; 1604a2a386eSRichard Tran Mills PetscErrorCode ierr; 1614a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 1624a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 1634a2a386eSRichard Tran Mills PetscInt i; 1644a2a386eSRichard Tran Mills 1654a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 1664a2a386eSRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 1674a2a386eSRichard Tran Mills 1684a2a386eSRichard Tran Mills PetscFunctionBegin; 1694a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1704a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1714a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 1724a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 1734a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1744a2a386eSRichard Tran Mills 1754a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 1764a2a386eSRichard Tran Mills mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 1774a2a386eSRichard Tran Mills 1784a2a386eSRichard Tran Mills /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 1794a2a386eSRichard Tran Mills for (i=0; i<m; i++) { 1804a2a386eSRichard Tran Mills z[i] += y[i]; 1814a2a386eSRichard Tran Mills } 1824a2a386eSRichard Tran Mills 1834a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1844a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1854a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1864a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1874a2a386eSRichard Tran Mills } 1884a2a386eSRichard Tran Mills 1894a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1904a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 1914a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 1924a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 1934a2a386eSRichard Tran Mills #undef __FUNCT__ 1944a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 1954a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1964a2a386eSRichard Tran Mills { 1974a2a386eSRichard Tran Mills PetscErrorCode ierr; 1984a2a386eSRichard Tran Mills Mat B = *newmat; 1994a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2004a2a386eSRichard Tran Mills 2014a2a386eSRichard Tran Mills PetscFunctionBegin; 2024a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 2034a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2044a2a386eSRichard Tran Mills } 2054a2a386eSRichard Tran Mills 2064a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 2074a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 2084a2a386eSRichard Tran Mills 2094a2a386eSRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. */ 2104a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 2114a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 2124a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 2134a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 2144a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 2154a2a386eSRichard Tran Mills 2164a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 2174a2a386eSRichard Tran Mills 2184a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 2194a2a386eSRichard Tran Mills *newmat = B; 2204a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2214a2a386eSRichard Tran Mills } 2224a2a386eSRichard Tran Mills 2234a2a386eSRichard Tran Mills #undef __FUNCT__ 2244a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreateSeqAIJMKL" 2254a2a386eSRichard Tran Mills /*@C 2264a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 2274a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 2284a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 2294a2a386eSRichard Tran Mills Collective on MPI_Comm 2304a2a386eSRichard Tran Mills 2314a2a386eSRichard Tran Mills Input Parameters: 2324a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 2334a2a386eSRichard Tran Mills . m - number of rows 2344a2a386eSRichard Tran Mills . n - number of columns 2354a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 2364a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 2374a2a386eSRichard Tran Mills (possibly different for each row) or NULL 2384a2a386eSRichard Tran Mills 2394a2a386eSRichard Tran Mills Output Parameter: 2404a2a386eSRichard Tran Mills . A - the matrix 2414a2a386eSRichard Tran Mills 2424a2a386eSRichard Tran Mills Notes: 2434a2a386eSRichard Tran Mills If nnz is given then nz is ignored 2444a2a386eSRichard Tran Mills 2454a2a386eSRichard Tran Mills Level: intermediate 2464a2a386eSRichard Tran Mills 2474a2a386eSRichard Tran Mills .keywords: matrix, cray, sparse, parallel 2484a2a386eSRichard Tran Mills 2494a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 2504a2a386eSRichard Tran Mills @*/ 2514a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2524a2a386eSRichard Tran Mills { 2534a2a386eSRichard Tran Mills PetscErrorCode ierr; 2544a2a386eSRichard Tran Mills 2554a2a386eSRichard Tran Mills PetscFunctionBegin; 2564a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 2574a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2584a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 2594a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 2604a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2614a2a386eSRichard Tran Mills } 2624a2a386eSRichard Tran Mills 2634a2a386eSRichard Tran Mills #undef __FUNCT__ 2644a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreate_SeqAIJMKL" 2654a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 2664a2a386eSRichard Tran Mills { 2674a2a386eSRichard Tran Mills PetscErrorCode ierr; 2684a2a386eSRichard Tran Mills 2694a2a386eSRichard Tran Mills PetscFunctionBegin; 2704a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 2714a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2724a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2734a2a386eSRichard Tran Mills } 2744a2a386eSRichard Tran Mills 2754a2a386eSRichard Tran Mills 276