14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20df555b71SRichard Tran Mills struct matrix_descr descr; 21b8cbc1fbSRichard Tran Mills #endif 224a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 234a2a386eSRichard Tran Mills 244a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 254a2a386eSRichard Tran Mills 264a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 274a2a386eSRichard Tran Mills { 284a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 294a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 304a2a386eSRichard Tran Mills PetscErrorCode ierr; 314a2a386eSRichard Tran Mills Mat B = *newmat; 324a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 334a2a386eSRichard Tran Mills 344a2a386eSRichard Tran Mills PetscFunctionBegin; 354a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 364a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37e9c94282SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)B->spptr; 384a2a386eSRichard Tran Mills } 394a2a386eSRichard Tran Mills 404a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4154871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 424a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 434a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4454871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 45ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 4887c2a1d7SRichard Tran Mills B->ops->scale = MatScale_SeqAIJ; 4987c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 5087c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 5187c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJ; 524a2a386eSRichard Tran Mills 53e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57e9c94282SRichard Tran Mills 584abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 59e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 60e9c94282SRichard Tran Mills * the spptr pointer. */ 614abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 624abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 630632b357SRichard Tran Mills sparse_status_t stat; 644abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 654abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 664abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 674abfa3b3SRichard Tran Mills } 684abfa3b3SRichard Tran Mills } 694abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 70e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 714a2a386eSRichard Tran Mills 724a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 734a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 744a2a386eSRichard Tran Mills 754a2a386eSRichard Tran Mills *newmat = B; 764a2a386eSRichard Tran Mills PetscFunctionReturn(0); 774a2a386eSRichard Tran Mills } 784a2a386eSRichard Tran Mills 794a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 804a2a386eSRichard Tran Mills { 814a2a386eSRichard Tran Mills PetscErrorCode ierr; 824a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 834a2a386eSRichard Tran Mills 844a2a386eSRichard Tran Mills PetscFunctionBegin; 85e9c94282SRichard Tran Mills 86e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87e9c94282SRichard Tran Mills * spptr pointer. */ 88e9c94282SRichard Tran Mills if (aijmkl) { 894a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 904abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 914abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 924abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 934abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 944abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 954abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 964abfa3b3SRichard Tran Mills } 974abfa3b3SRichard Tran Mills } 984abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 994a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100e9c94282SRichard Tran Mills } 1014a2a386eSRichard Tran Mills 1024a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1034a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1044a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1054a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1064a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1074a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1084a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1094a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1104a2a386eSRichard Tran Mills } 1114a2a386eSRichard Tran Mills 1126e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1134a2a386eSRichard Tran Mills { 1144a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 11658678438SRichard Tran Mills PetscInt m,n; 1176e369cd5SRichard Tran Mills MatScalar *aa; 118df555b71SRichard Tran Mills PetscInt *aj,*ai; 1194a2a386eSRichard Tran Mills 1206e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1216e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1226e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1236e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 1246e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1256e369cd5SRichard Tran Mills #else 1264a2a386eSRichard Tran Mills 1276e369cd5SRichard Tran Mills sparse_status_t stat; 1284a2a386eSRichard Tran Mills 129df555b71SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 1306e369cd5SRichard Tran Mills 1316e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1326e369cd5SRichard Tran Mills 1330632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1340632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1350632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1360632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1370632b357SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1380632b357SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1390632b357SRichard Tran Mills } 1400632b357SRichard Tran Mills } 1418d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1426e369cd5SRichard Tran Mills 143c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 144df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14758678438SRichard Tran Mills m = A->rmap->n; 14858678438SRichard Tran Mills n = A->cmap->n; 149df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 151df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1528d3fe1b0SRichard Tran Mills if (a->nz) { 1538d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1548d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 15558678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 159df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 160f68ad4bdSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 162df555b71SRichard Tran Mills } 1634abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 164c9d46305SRichard Tran Mills } 1656e369cd5SRichard Tran Mills 1666e369cd5SRichard Tran Mills PetscFunctionReturn(0); 167d995685eSRichard Tran Mills #endif 1686e369cd5SRichard Tran Mills } 1696e369cd5SRichard Tran Mills 1706e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 1716e369cd5SRichard Tran Mills { 1726e369cd5SRichard Tran Mills PetscErrorCode ierr; 1736e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1746e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 1756e369cd5SRichard Tran Mills 1766e369cd5SRichard Tran Mills PetscFunctionBegin; 1776e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 1786e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 1796e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 1806e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 1816e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 1826e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 1836e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1846e369cd5SRichard Tran Mills } 1856e369cd5SRichard Tran Mills 1866e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 1876e369cd5SRichard Tran Mills { 1886e369cd5SRichard Tran Mills PetscErrorCode ierr; 1896e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1906e369cd5SRichard Tran Mills 1916e369cd5SRichard Tran Mills PetscFunctionBegin; 1926e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1936e369cd5SRichard Tran Mills 1946e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 1956e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 1966e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 1976e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 198d96e85feSRichard Tran Mills * a lot of code duplication. */ 1996e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 2006e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 2016e369cd5SRichard Tran Mills 2026e369cd5SRichard Tran Mills /* Now create the MKL sparse handle (if needed; the function checks). */ 2036e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 204df555b71SRichard Tran Mills 2054a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2064a2a386eSRichard Tran Mills } 2074a2a386eSRichard Tran Mills 2084a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 2094a2a386eSRichard Tran Mills { 2104a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2114a2a386eSRichard Tran Mills const PetscScalar *x; 2124a2a386eSRichard Tran Mills PetscScalar *y; 2134a2a386eSRichard Tran Mills const MatScalar *aa; 2144a2a386eSRichard Tran Mills PetscErrorCode ierr; 2154a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 216db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 217db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 218db63039fSRichard Tran Mills PetscScalar beta = 0.0; 2194a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 220db63039fSRichard Tran Mills char matdescra[6]; 221db63039fSRichard Tran Mills 2224a2a386eSRichard Tran Mills 2234a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 224ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 225ff03dc53SRichard Tran Mills 226ff03dc53SRichard Tran Mills PetscFunctionBegin; 227db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 228db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 229ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 230ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 232ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 233ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 234ff03dc53SRichard Tran Mills 235ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 236db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 237ff03dc53SRichard Tran Mills 238ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 239ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 240ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 241ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 242ff03dc53SRichard Tran Mills } 243ff03dc53SRichard Tran Mills 244d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 245df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 246df555b71SRichard Tran Mills { 247df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 249df555b71SRichard Tran Mills const PetscScalar *x; 250df555b71SRichard Tran Mills PetscScalar *y; 251df555b71SRichard Tran Mills PetscErrorCode ierr; 252df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 253df555b71SRichard Tran Mills 254df555b71SRichard Tran Mills PetscFunctionBegin; 255df555b71SRichard Tran Mills 256*38987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 257*38987b35SRichard Tran Mills if(!a->nz) { 258*38987b35SRichard Tran Mills PetscInt i; 259*38987b35SRichard Tran Mills PetscInt m=A->rmap->n; 260*38987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 261*38987b35SRichard Tran Mills for (i=0; i<m; i++) { 262*38987b35SRichard Tran Mills y[i] = 0.0; 263*38987b35SRichard Tran Mills } 264*38987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 265*38987b35SRichard Tran Mills PetscFunctionReturn(0); 266*38987b35SRichard Tran Mills } 267f36dfe3fSRichard Tran Mills 268df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 269df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 270df555b71SRichard Tran Mills 2713fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2723fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2733fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 2743fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 2753fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 2763fa15762SRichard Tran Mills } 2773fa15762SRichard Tran Mills 278df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 279df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 280df555b71SRichard Tran Mills 281df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 282df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 283df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 284df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 285df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 286df555b71SRichard Tran Mills } 287df555b71SRichard Tran Mills PetscFunctionReturn(0); 288df555b71SRichard Tran Mills } 289d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 290df555b71SRichard Tran Mills 291ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 292ff03dc53SRichard Tran Mills { 293ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 294ff03dc53SRichard Tran Mills const PetscScalar *x; 295ff03dc53SRichard Tran Mills PetscScalar *y; 296ff03dc53SRichard Tran Mills const MatScalar *aa; 297ff03dc53SRichard Tran Mills PetscErrorCode ierr; 298ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 299db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 300db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 301db63039fSRichard Tran Mills PetscScalar beta = 0.0; 302ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 303db63039fSRichard Tran Mills char matdescra[6]; 304ff03dc53SRichard Tran Mills 305ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 306ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 3074a2a386eSRichard Tran Mills 3084a2a386eSRichard Tran Mills PetscFunctionBegin; 309969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 310969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3114a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3124a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 3134a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 3144a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 3154a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 3164a2a386eSRichard Tran Mills 3174a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 318db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 3194a2a386eSRichard Tran Mills 3204a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 3214a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3224a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 3234a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3244a2a386eSRichard Tran Mills } 3254a2a386eSRichard Tran Mills 326d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 327df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 328df555b71SRichard Tran Mills { 329df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 330df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 331df555b71SRichard Tran Mills const PetscScalar *x; 332df555b71SRichard Tran Mills PetscScalar *y; 333df555b71SRichard Tran Mills PetscErrorCode ierr; 3340632b357SRichard Tran Mills sparse_status_t stat; 335df555b71SRichard Tran Mills 336df555b71SRichard Tran Mills PetscFunctionBegin; 337df555b71SRichard Tran Mills 338*38987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 339*38987b35SRichard Tran Mills if(!a->nz) { 340*38987b35SRichard Tran Mills PetscInt i; 341*38987b35SRichard Tran Mills PetscInt n=A->cmap->n; 342*38987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 343*38987b35SRichard Tran Mills for (i=0; i<n; i++) { 344*38987b35SRichard Tran Mills y[i] = 0.0; 345*38987b35SRichard Tran Mills } 346*38987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 347*38987b35SRichard Tran Mills PetscFunctionReturn(0); 348*38987b35SRichard Tran Mills } 349f36dfe3fSRichard Tran Mills 350df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 351df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 352df555b71SRichard Tran Mills 3533fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3543fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3553fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3563fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 3573fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3583fa15762SRichard Tran Mills } 3593fa15762SRichard Tran Mills 360df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 361df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 362df555b71SRichard Tran Mills 363df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 364df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 365df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 366df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 367df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 368df555b71SRichard Tran Mills } 369df555b71SRichard Tran Mills PetscFunctionReturn(0); 370df555b71SRichard Tran Mills } 371d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 372df555b71SRichard Tran Mills 3734a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 3744a2a386eSRichard Tran Mills { 3754a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3764a2a386eSRichard Tran Mills const PetscScalar *x; 3774a2a386eSRichard Tran Mills PetscScalar *y,*z; 3784a2a386eSRichard Tran Mills const MatScalar *aa; 3794a2a386eSRichard Tran Mills PetscErrorCode ierr; 3804a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 381db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 3824a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 3834a2a386eSRichard Tran Mills PetscInt i; 3844a2a386eSRichard Tran Mills 385ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 386ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 387a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 388db63039fSRichard Tran Mills PetscScalar beta; 389a84739b8SRichard Tran Mills char matdescra[6]; 390ff03dc53SRichard Tran Mills 391ff03dc53SRichard Tran Mills PetscFunctionBegin; 392a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 393a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 394a84739b8SRichard Tran Mills 395ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 397ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 398ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 399ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 400ff03dc53SRichard Tran Mills 401ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 402a84739b8SRichard Tran Mills if (zz == yy) { 403a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 404db63039fSRichard Tran Mills beta = 1.0; 405db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 406a84739b8SRichard Tran Mills } else { 407db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 408db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 409db63039fSRichard Tran Mills beta = 0.0; 410db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 411ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 412ff03dc53SRichard Tran Mills z[i] += y[i]; 413ff03dc53SRichard Tran Mills } 414a84739b8SRichard Tran Mills } 415ff03dc53SRichard Tran Mills 416ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 417ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 418ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 419ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 420ff03dc53SRichard Tran Mills } 421ff03dc53SRichard Tran Mills 422d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 423df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 424df555b71SRichard Tran Mills { 425df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 426df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 427df555b71SRichard Tran Mills const PetscScalar *x; 428df555b71SRichard Tran Mills PetscScalar *y,*z; 429df555b71SRichard Tran Mills PetscErrorCode ierr; 430df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 431df555b71SRichard Tran Mills PetscInt i; 432df555b71SRichard Tran Mills 433df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 434df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 435df555b71SRichard Tran Mills 436df555b71SRichard Tran Mills PetscFunctionBegin; 437df555b71SRichard Tran Mills 438*38987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 439*38987b35SRichard Tran Mills if(!a->nz) { 440*38987b35SRichard Tran Mills PetscInt i; 441*38987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 442*38987b35SRichard Tran Mills for (i=0; i<m; i++) { 443*38987b35SRichard Tran Mills z[i] = y[i]; 444*38987b35SRichard Tran Mills } 445*38987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 446*38987b35SRichard Tran Mills PetscFunctionReturn(0); 447*38987b35SRichard Tran Mills } 448df555b71SRichard Tran Mills 449df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 451df555b71SRichard Tran Mills 4523fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4533fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4543fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4553fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 4563fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4573fa15762SRichard Tran Mills } 4583fa15762SRichard Tran Mills 459df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 460df555b71SRichard Tran Mills if (zz == yy) { 461df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 462df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 463db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 464df555b71SRichard Tran Mills } else { 465df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 466df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 467db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 468df555b71SRichard Tran Mills for (i=0; i<m; i++) { 469df555b71SRichard Tran Mills z[i] += y[i]; 470df555b71SRichard Tran Mills } 471df555b71SRichard Tran Mills } 472df555b71SRichard Tran Mills 473df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 474df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 475df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 476df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 477df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 478df555b71SRichard Tran Mills } 479df555b71SRichard Tran Mills PetscFunctionReturn(0); 480df555b71SRichard Tran Mills } 481d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 482df555b71SRichard Tran Mills 483ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 484ff03dc53SRichard Tran Mills { 485ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 486ff03dc53SRichard Tran Mills const PetscScalar *x; 487ff03dc53SRichard Tran Mills PetscScalar *y,*z; 488ff03dc53SRichard Tran Mills const MatScalar *aa; 489ff03dc53SRichard Tran Mills PetscErrorCode ierr; 490ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 491db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 492ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 493ff03dc53SRichard Tran Mills PetscInt i; 494ff03dc53SRichard Tran Mills 495ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 496ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 497a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 498db63039fSRichard Tran Mills PetscScalar beta; 499a84739b8SRichard Tran Mills char matdescra[6]; 5004a2a386eSRichard Tran Mills 5014a2a386eSRichard Tran Mills PetscFunctionBegin; 502a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 503a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 504a84739b8SRichard Tran Mills 5054a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5064a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 5074a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 5084a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 5094a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 5104a2a386eSRichard Tran Mills 5114a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 512a84739b8SRichard Tran Mills if (zz == yy) { 513a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 514db63039fSRichard Tran Mills beta = 1.0; 515969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 516a84739b8SRichard Tran Mills } else { 517db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 518db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 519db63039fSRichard Tran Mills beta = 0.0; 520db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 521969800c5SRichard Tran Mills for (i=0; i<n; i++) { 5224a2a386eSRichard Tran Mills z[i] += y[i]; 5234a2a386eSRichard Tran Mills } 524a84739b8SRichard Tran Mills } 5254a2a386eSRichard Tran Mills 5264a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 5274a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5284a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 5294a2a386eSRichard Tran Mills PetscFunctionReturn(0); 5304a2a386eSRichard Tran Mills } 5314a2a386eSRichard Tran Mills 532d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 533df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 534df555b71SRichard Tran Mills { 535df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 536df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 537df555b71SRichard Tran Mills const PetscScalar *x; 538df555b71SRichard Tran Mills PetscScalar *y,*z; 539df555b71SRichard Tran Mills PetscErrorCode ierr; 540969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 541df555b71SRichard Tran Mills PetscInt i; 542df555b71SRichard Tran Mills 543df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 544df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 545df555b71SRichard Tran Mills 546df555b71SRichard Tran Mills PetscFunctionBegin; 547df555b71SRichard Tran Mills 548*38987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 549*38987b35SRichard Tran Mills if(!a->nz) { 550*38987b35SRichard Tran Mills PetscInt i; 551*38987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 552*38987b35SRichard Tran Mills for (i=0; i<n; i++) { 553*38987b35SRichard Tran Mills z[i] = y[i]; 554*38987b35SRichard Tran Mills } 555*38987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 556*38987b35SRichard Tran Mills PetscFunctionReturn(0); 557*38987b35SRichard Tran Mills } 558f36dfe3fSRichard Tran Mills 559df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 560df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 561df555b71SRichard Tran Mills 5623fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5633fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5643fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5653fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 5663fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5673fa15762SRichard Tran Mills } 5683fa15762SRichard Tran Mills 569df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 570df555b71SRichard Tran Mills if (zz == yy) { 571df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 572df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 573db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 574df555b71SRichard Tran Mills } else { 575df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 576df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 577db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 578969800c5SRichard Tran Mills for (i=0; i<n; i++) { 579df555b71SRichard Tran Mills z[i] += y[i]; 580df555b71SRichard Tran Mills } 581df555b71SRichard Tran Mills } 582df555b71SRichard Tran Mills 583df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 584df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 585df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 586df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 587df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 588df555b71SRichard Tran Mills } 589df555b71SRichard Tran Mills PetscFunctionReturn(0); 590df555b71SRichard Tran Mills } 591d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 592df555b71SRichard Tran Mills 59387c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 594db63039fSRichard Tran Mills { 595db63039fSRichard Tran Mills PetscErrorCode ierr; 596db63039fSRichard Tran Mills 59787c2a1d7SRichard Tran Mills PetscFunctionBegin; 598db63039fSRichard Tran Mills ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 599db63039fSRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 600db63039fSRichard Tran Mills PetscFunctionReturn(0); 601db63039fSRichard Tran Mills } 602df555b71SRichard Tran Mills 60387c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 60487c2a1d7SRichard Tran Mills { 60587c2a1d7SRichard Tran Mills PetscErrorCode ierr; 60687c2a1d7SRichard Tran Mills 60787c2a1d7SRichard Tran Mills PetscFunctionBegin; 60887c2a1d7SRichard Tran Mills ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 60987c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 61087c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 61187c2a1d7SRichard Tran Mills } 61287c2a1d7SRichard Tran Mills 61387c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 61487c2a1d7SRichard Tran Mills { 61587c2a1d7SRichard Tran Mills PetscErrorCode ierr; 61687c2a1d7SRichard Tran Mills 61787c2a1d7SRichard Tran Mills PetscFunctionBegin; 61887c2a1d7SRichard Tran Mills ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 61987c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 62087c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 62187c2a1d7SRichard Tran Mills } 62287c2a1d7SRichard Tran Mills 62387c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 62487c2a1d7SRichard Tran Mills { 62587c2a1d7SRichard Tran Mills PetscErrorCode ierr; 62687c2a1d7SRichard Tran Mills 62787c2a1d7SRichard Tran Mills PetscFunctionBegin; 62887c2a1d7SRichard Tran Mills ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 62987c2a1d7SRichard Tran Mills if (str == SAME_NONZERO_PATTERN) { 63087c2a1d7SRichard Tran Mills /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 63187c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 63287c2a1d7SRichard Tran Mills } 63387c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 63487c2a1d7SRichard Tran Mills } 63587c2a1d7SRichard Tran Mills 6364a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 6374a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 6384a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 6394a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 6404a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 6414a2a386eSRichard Tran Mills { 6424a2a386eSRichard Tran Mills PetscErrorCode ierr; 6434a2a386eSRichard Tran Mills Mat B = *newmat; 6444a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 645c9d46305SRichard Tran Mills PetscBool set; 646e9c94282SRichard Tran Mills PetscBool sametype; 6474a2a386eSRichard Tran Mills 6484a2a386eSRichard Tran Mills PetscFunctionBegin; 6494a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 6504a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 6514a2a386eSRichard Tran Mills } 6524a2a386eSRichard Tran Mills 653e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 654e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 655e9c94282SRichard Tran Mills 6564a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 6574a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 6584a2a386eSRichard Tran Mills 659df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 660969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 6614a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 6624a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 6634a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 664c9d46305SRichard Tran Mills 6654abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 666d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 667d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 668d995685eSRichard Tran Mills #elif 669d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 670d995685eSRichard Tran Mills #endif 6714abfa3b3SRichard Tran Mills 6724abfa3b3SRichard Tran Mills /* Parse command line options. */ 673c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 674c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 675c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 676d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 677d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 678d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 679d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 680d995685eSRichard Tran Mills } 681d995685eSRichard Tran Mills #endif 682c9d46305SRichard Tran Mills 683c9d46305SRichard Tran Mills if(!aijmkl->no_SpMV2) { 684d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 685df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 686969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 687df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 688969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 689d995685eSRichard Tran Mills #endif 690c9d46305SRichard Tran Mills } else { 6914a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 692969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 6934a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 694969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 695c9d46305SRichard Tran Mills } 6964a2a386eSRichard Tran Mills 697db63039fSRichard Tran Mills B->ops->scale = MatScale_SeqAIJMKL; 69887c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 69987c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 70087c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJMKL; 701db63039fSRichard Tran Mills 702db63039fSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 7034a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 704e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 705e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 706e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 7074a2a386eSRichard Tran Mills 7084a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 7094a2a386eSRichard Tran Mills *newmat = B; 7104a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7114a2a386eSRichard Tran Mills } 7124a2a386eSRichard Tran Mills 7134a2a386eSRichard Tran Mills /*@C 7144a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 7154a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 7164a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 71790147e49SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 71890147e49SRichard Tran Mills operations are currently supported. 71990147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 72090147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 72190147e49SRichard Tran Mills 7224a2a386eSRichard Tran Mills Collective on MPI_Comm 7234a2a386eSRichard Tran Mills 7244a2a386eSRichard Tran Mills Input Parameters: 7254a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 7264a2a386eSRichard Tran Mills . m - number of rows 7274a2a386eSRichard Tran Mills . n - number of columns 7284a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 7294a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 7304a2a386eSRichard Tran Mills (possibly different for each row) or NULL 7314a2a386eSRichard Tran Mills 7324a2a386eSRichard Tran Mills Output Parameter: 7334a2a386eSRichard Tran Mills . A - the matrix 7344a2a386eSRichard Tran Mills 73590147e49SRichard Tran Mills Options Database Keys: 73690147e49SRichard Tran Mills . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 73790147e49SRichard Tran Mills 7384a2a386eSRichard Tran Mills Notes: 7394a2a386eSRichard Tran Mills If nnz is given then nz is ignored 7404a2a386eSRichard Tran Mills 7414a2a386eSRichard Tran Mills Level: intermediate 7424a2a386eSRichard Tran Mills 74390147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 7444a2a386eSRichard Tran Mills 7454a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 7464a2a386eSRichard Tran Mills @*/ 7474a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 7484a2a386eSRichard Tran Mills { 7494a2a386eSRichard Tran Mills PetscErrorCode ierr; 7504a2a386eSRichard Tran Mills 7514a2a386eSRichard Tran Mills PetscFunctionBegin; 7524a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 7534a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 7544a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 7554a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 7564a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7574a2a386eSRichard Tran Mills } 7584a2a386eSRichard Tran Mills 7594a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 7604a2a386eSRichard Tran Mills { 7614a2a386eSRichard Tran Mills PetscErrorCode ierr; 7624a2a386eSRichard Tran Mills 7634a2a386eSRichard Tran Mills PetscFunctionBegin; 7644a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 7654a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7664a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7674a2a386eSRichard Tran Mills } 768