14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20df555b71SRichard Tran Mills struct matrix_descr descr; 21b8cbc1fbSRichard Tran Mills #endif 224a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 234a2a386eSRichard Tran Mills 244a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 254a2a386eSRichard Tran Mills 264a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 274a2a386eSRichard Tran Mills { 284a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 294a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 304a2a386eSRichard Tran Mills PetscErrorCode ierr; 314a2a386eSRichard Tran Mills Mat B = *newmat; 324a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 334a2a386eSRichard Tran Mills 344a2a386eSRichard Tran Mills PetscFunctionBegin; 354a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 364a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37e9c94282SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)B->spptr; 384a2a386eSRichard Tran Mills } 394a2a386eSRichard Tran Mills 404a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4154871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 424a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 434a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4454871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 45ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 4887c2a1d7SRichard Tran Mills B->ops->scale = MatScale_SeqAIJ; 4987c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 5087c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 5187c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJ; 524a2a386eSRichard Tran Mills 53e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57e9c94282SRichard Tran Mills 584abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 59e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 60e9c94282SRichard Tran Mills * the spptr pointer. */ 614abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 624abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 630632b357SRichard Tran Mills sparse_status_t stat; 644abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 654abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 664abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 674abfa3b3SRichard Tran Mills } 684abfa3b3SRichard Tran Mills } 694abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 70e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 714a2a386eSRichard Tran Mills 724a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 734a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 744a2a386eSRichard Tran Mills 754a2a386eSRichard Tran Mills *newmat = B; 764a2a386eSRichard Tran Mills PetscFunctionReturn(0); 774a2a386eSRichard Tran Mills } 784a2a386eSRichard Tran Mills 794a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 804a2a386eSRichard Tran Mills { 814a2a386eSRichard Tran Mills PetscErrorCode ierr; 824a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 834a2a386eSRichard Tran Mills 844a2a386eSRichard Tran Mills PetscFunctionBegin; 85e9c94282SRichard Tran Mills 86e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87e9c94282SRichard Tran Mills * spptr pointer. */ 88e9c94282SRichard Tran Mills if (aijmkl) { 894a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 904abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 914abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 924abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 934abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 944abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 954abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 964abfa3b3SRichard Tran Mills } 974abfa3b3SRichard Tran Mills } 984abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 994a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100e9c94282SRichard Tran Mills } 1014a2a386eSRichard Tran Mills 1024a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1034a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1044a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1054a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1064a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1074a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1084a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1094a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1104a2a386eSRichard Tran Mills } 1114a2a386eSRichard Tran Mills 1126e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1134a2a386eSRichard Tran Mills { 1144a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 11658678438SRichard Tran Mills PetscInt m,n; 1176e369cd5SRichard Tran Mills MatScalar *aa; 118df555b71SRichard Tran Mills PetscInt *aj,*ai; 1194a2a386eSRichard Tran Mills 1206e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1216e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1226e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1236e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 1246e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1256e369cd5SRichard Tran Mills #else 1264a2a386eSRichard Tran Mills 1276e369cd5SRichard Tran Mills sparse_status_t stat; 1284a2a386eSRichard Tran Mills 129df555b71SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 1306e369cd5SRichard Tran Mills 1316e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1326e369cd5SRichard Tran Mills 1330632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1340632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1350632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1360632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1370632b357SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1380632b357SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1390632b357SRichard Tran Mills } 1400632b357SRichard Tran Mills } 1418d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1426e369cd5SRichard Tran Mills 143c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 144df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14758678438SRichard Tran Mills m = A->rmap->n; 14858678438SRichard Tran Mills n = A->cmap->n; 149df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 151df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1528d3fe1b0SRichard Tran Mills if (a->nz) { 1538d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1548d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 15558678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 159df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 160f68ad4bdSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 162df555b71SRichard Tran Mills } 1634abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 164c9d46305SRichard Tran Mills } 1656e369cd5SRichard Tran Mills 1666e369cd5SRichard Tran Mills PetscFunctionReturn(0); 167d995685eSRichard Tran Mills #endif 1686e369cd5SRichard Tran Mills } 1696e369cd5SRichard Tran Mills 1706e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 1716e369cd5SRichard Tran Mills { 1726e369cd5SRichard Tran Mills PetscErrorCode ierr; 1736e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1746e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 1756e369cd5SRichard Tran Mills 1766e369cd5SRichard Tran Mills PetscFunctionBegin; 1776e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 1786e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 1796e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 1806e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 1816e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 1826e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 1836e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1846e369cd5SRichard Tran Mills } 1856e369cd5SRichard Tran Mills 1866e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 1876e369cd5SRichard Tran Mills { 1886e369cd5SRichard Tran Mills PetscErrorCode ierr; 1896e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1906e369cd5SRichard Tran Mills 1916e369cd5SRichard Tran Mills PetscFunctionBegin; 1926e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1936e369cd5SRichard Tran Mills 1946e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 1956e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 1966e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 1976e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 198*d96e85feSRichard Tran Mills * a lot of code duplication. */ 1996e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 2006e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 2016e369cd5SRichard Tran Mills 2026e369cd5SRichard Tran Mills /* Now create the MKL sparse handle (if needed; the function checks). */ 2036e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 204df555b71SRichard Tran Mills 2054a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2064a2a386eSRichard Tran Mills } 2074a2a386eSRichard Tran Mills 2084a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 2094a2a386eSRichard Tran Mills { 2104a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2114a2a386eSRichard Tran Mills const PetscScalar *x; 2124a2a386eSRichard Tran Mills PetscScalar *y; 2134a2a386eSRichard Tran Mills const MatScalar *aa; 2144a2a386eSRichard Tran Mills PetscErrorCode ierr; 2154a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 216db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 217db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 218db63039fSRichard Tran Mills PetscScalar beta = 0.0; 2194a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 220db63039fSRichard Tran Mills char matdescra[6]; 221db63039fSRichard Tran Mills 2224a2a386eSRichard Tran Mills 2234a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 224ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 225ff03dc53SRichard Tran Mills 226ff03dc53SRichard Tran Mills PetscFunctionBegin; 227db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 228db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 229ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 230ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 232ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 233ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 234ff03dc53SRichard Tran Mills 235ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 236db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 237ff03dc53SRichard Tran Mills 238ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 239ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 240ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 241ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 242ff03dc53SRichard Tran Mills } 243ff03dc53SRichard Tran Mills 244d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 245df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 246df555b71SRichard Tran Mills { 247df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 249df555b71SRichard Tran Mills const PetscScalar *x; 250df555b71SRichard Tran Mills PetscScalar *y; 251df555b71SRichard Tran Mills PetscErrorCode ierr; 252df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 253df555b71SRichard Tran Mills 254df555b71SRichard Tran Mills PetscFunctionBegin; 255df555b71SRichard Tran Mills 2568d3fe1b0SRichard Tran Mills /* If there are no nonzero entries, this is a no-op: return immediately. */ 2578d3fe1b0SRichard Tran Mills if(!a->nz) PetscFunctionReturn(0); 258f36dfe3fSRichard Tran Mills 259df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 260df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 261df555b71SRichard Tran Mills 2623fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2633fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2643fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 2653fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 2663fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 2673fa15762SRichard Tran Mills } 2683fa15762SRichard Tran Mills 269df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 270df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 271df555b71SRichard Tran Mills 272df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 273df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 274df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 275df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 276df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 277df555b71SRichard Tran Mills } 278df555b71SRichard Tran Mills PetscFunctionReturn(0); 279df555b71SRichard Tran Mills } 280d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 281df555b71SRichard Tran Mills 282ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 283ff03dc53SRichard Tran Mills { 284ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 285ff03dc53SRichard Tran Mills const PetscScalar *x; 286ff03dc53SRichard Tran Mills PetscScalar *y; 287ff03dc53SRichard Tran Mills const MatScalar *aa; 288ff03dc53SRichard Tran Mills PetscErrorCode ierr; 289ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 290db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 291db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 292db63039fSRichard Tran Mills PetscScalar beta = 0.0; 293ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 294db63039fSRichard Tran Mills char matdescra[6]; 295ff03dc53SRichard Tran Mills 296ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 297ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 2984a2a386eSRichard Tran Mills 2994a2a386eSRichard Tran Mills PetscFunctionBegin; 300969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 301969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3024a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3034a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 3044a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 3054a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 3064a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 3074a2a386eSRichard Tran Mills 3084a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 309db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 3104a2a386eSRichard Tran Mills 3114a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 3124a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3134a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 3144a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3154a2a386eSRichard Tran Mills } 3164a2a386eSRichard Tran Mills 317d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 318df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 319df555b71SRichard Tran Mills { 320df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 321df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 322df555b71SRichard Tran Mills const PetscScalar *x; 323df555b71SRichard Tran Mills PetscScalar *y; 324df555b71SRichard Tran Mills PetscErrorCode ierr; 3250632b357SRichard Tran Mills sparse_status_t stat; 326df555b71SRichard Tran Mills 327df555b71SRichard Tran Mills PetscFunctionBegin; 328df555b71SRichard Tran Mills 3298d3fe1b0SRichard Tran Mills /* If there are no nonzero entries, this is a no-op: return immediately. */ 3308d3fe1b0SRichard Tran Mills if(!a->nz) PetscFunctionReturn(0); 331f36dfe3fSRichard Tran Mills 332df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 333df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 334df555b71SRichard Tran Mills 3353fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3363fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3373fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3383fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 3393fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3403fa15762SRichard Tran Mills } 3413fa15762SRichard Tran Mills 342df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 343df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 344df555b71SRichard Tran Mills 345df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 346df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 347df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 348df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 349df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 350df555b71SRichard Tran Mills } 351df555b71SRichard Tran Mills PetscFunctionReturn(0); 352df555b71SRichard Tran Mills } 353d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 354df555b71SRichard Tran Mills 3554a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 3564a2a386eSRichard Tran Mills { 3574a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3584a2a386eSRichard Tran Mills const PetscScalar *x; 3594a2a386eSRichard Tran Mills PetscScalar *y,*z; 3604a2a386eSRichard Tran Mills const MatScalar *aa; 3614a2a386eSRichard Tran Mills PetscErrorCode ierr; 3624a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 363db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 3644a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 3654a2a386eSRichard Tran Mills PetscInt i; 3664a2a386eSRichard Tran Mills 367ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 368ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 369a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 370db63039fSRichard Tran Mills PetscScalar beta; 371a84739b8SRichard Tran Mills char matdescra[6]; 372ff03dc53SRichard Tran Mills 373ff03dc53SRichard Tran Mills PetscFunctionBegin; 374a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 375a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 376a84739b8SRichard Tran Mills 377ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 378ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 379ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 380ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 381ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 382ff03dc53SRichard Tran Mills 383ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 384a84739b8SRichard Tran Mills if (zz == yy) { 385a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 386db63039fSRichard Tran Mills beta = 1.0; 387db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 388a84739b8SRichard Tran Mills } else { 389db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 390db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 391db63039fSRichard Tran Mills beta = 0.0; 392db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 393ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 394ff03dc53SRichard Tran Mills z[i] += y[i]; 395ff03dc53SRichard Tran Mills } 396a84739b8SRichard Tran Mills } 397ff03dc53SRichard Tran Mills 398ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 399ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 400ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 401ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 402ff03dc53SRichard Tran Mills } 403ff03dc53SRichard Tran Mills 404d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 405df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 406df555b71SRichard Tran Mills { 407df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 409df555b71SRichard Tran Mills const PetscScalar *x; 410df555b71SRichard Tran Mills PetscScalar *y,*z; 411df555b71SRichard Tran Mills PetscErrorCode ierr; 412df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 413df555b71SRichard Tran Mills PetscInt i; 414df555b71SRichard Tran Mills 415df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 416df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 417df555b71SRichard Tran Mills 418df555b71SRichard Tran Mills PetscFunctionBegin; 419df555b71SRichard Tran Mills 4208d3fe1b0SRichard Tran Mills /* If there are no nonzero entries, this is a no-op: return immediately. */ 4218d3fe1b0SRichard Tran Mills if(!a->nz) PetscFunctionReturn(0); 422df555b71SRichard Tran Mills 423df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 424df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 425df555b71SRichard Tran Mills 4263fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4273fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4283fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4293fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 4303fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4313fa15762SRichard Tran Mills } 4323fa15762SRichard Tran Mills 433df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 434df555b71SRichard Tran Mills if (zz == yy) { 435df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 436df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 437db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 438df555b71SRichard Tran Mills } else { 439df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 440df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 441db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 442df555b71SRichard Tran Mills for (i=0; i<m; i++) { 443df555b71SRichard Tran Mills z[i] += y[i]; 444df555b71SRichard Tran Mills } 445df555b71SRichard Tran Mills } 446df555b71SRichard Tran Mills 447df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 448df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 449df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 450df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 451df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 452df555b71SRichard Tran Mills } 453df555b71SRichard Tran Mills PetscFunctionReturn(0); 454df555b71SRichard Tran Mills } 455d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 456df555b71SRichard Tran Mills 457ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 458ff03dc53SRichard Tran Mills { 459ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 460ff03dc53SRichard Tran Mills const PetscScalar *x; 461ff03dc53SRichard Tran Mills PetscScalar *y,*z; 462ff03dc53SRichard Tran Mills const MatScalar *aa; 463ff03dc53SRichard Tran Mills PetscErrorCode ierr; 464ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 465db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 466ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 467ff03dc53SRichard Tran Mills PetscInt i; 468ff03dc53SRichard Tran Mills 469ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 470ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 471a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 472db63039fSRichard Tran Mills PetscScalar beta; 473a84739b8SRichard Tran Mills char matdescra[6]; 4744a2a386eSRichard Tran Mills 4754a2a386eSRichard Tran Mills PetscFunctionBegin; 476a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 477a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 478a84739b8SRichard Tran Mills 4794a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4804a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 4814a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4824a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4834a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4844a2a386eSRichard Tran Mills 4854a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 486a84739b8SRichard Tran Mills if (zz == yy) { 487a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 488db63039fSRichard Tran Mills beta = 1.0; 489969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 490a84739b8SRichard Tran Mills } else { 491db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 492db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 493db63039fSRichard Tran Mills beta = 0.0; 494db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 495969800c5SRichard Tran Mills for (i=0; i<n; i++) { 4964a2a386eSRichard Tran Mills z[i] += y[i]; 4974a2a386eSRichard Tran Mills } 498a84739b8SRichard Tran Mills } 4994a2a386eSRichard Tran Mills 5004a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 5014a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5024a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 5034a2a386eSRichard Tran Mills PetscFunctionReturn(0); 5044a2a386eSRichard Tran Mills } 5054a2a386eSRichard Tran Mills 506d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 507df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 508df555b71SRichard Tran Mills { 509df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 510df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 511df555b71SRichard Tran Mills const PetscScalar *x; 512df555b71SRichard Tran Mills PetscScalar *y,*z; 513df555b71SRichard Tran Mills PetscErrorCode ierr; 514969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 515df555b71SRichard Tran Mills PetscInt i; 516df555b71SRichard Tran Mills 517df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 518df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 519df555b71SRichard Tran Mills 520df555b71SRichard Tran Mills PetscFunctionBegin; 521df555b71SRichard Tran Mills 5228d3fe1b0SRichard Tran Mills /* If there are no nonzero entries, this is a no-op: return immediately. */ 5238d3fe1b0SRichard Tran Mills if(!a->nz) PetscFunctionReturn(0); 524f36dfe3fSRichard Tran Mills 525df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 527df555b71SRichard Tran Mills 5283fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5293fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5303fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5313fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 5323fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5333fa15762SRichard Tran Mills } 5343fa15762SRichard Tran Mills 535df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 536df555b71SRichard Tran Mills if (zz == yy) { 537df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 538df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 539db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 540df555b71SRichard Tran Mills } else { 541df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 542df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 543db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 544969800c5SRichard Tran Mills for (i=0; i<n; i++) { 545df555b71SRichard Tran Mills z[i] += y[i]; 546df555b71SRichard Tran Mills } 547df555b71SRichard Tran Mills } 548df555b71SRichard Tran Mills 549df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 550df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 551df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 552df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 553df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 554df555b71SRichard Tran Mills } 555df555b71SRichard Tran Mills PetscFunctionReturn(0); 556df555b71SRichard Tran Mills } 557d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 558df555b71SRichard Tran Mills 55987c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 560db63039fSRichard Tran Mills { 561db63039fSRichard Tran Mills PetscErrorCode ierr; 562db63039fSRichard Tran Mills 56387c2a1d7SRichard Tran Mills PetscFunctionBegin; 564db63039fSRichard Tran Mills ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 565db63039fSRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 566db63039fSRichard Tran Mills PetscFunctionReturn(0); 567db63039fSRichard Tran Mills } 568df555b71SRichard Tran Mills 56987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 57087c2a1d7SRichard Tran Mills { 57187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 57287c2a1d7SRichard Tran Mills 57387c2a1d7SRichard Tran Mills PetscFunctionBegin; 57487c2a1d7SRichard Tran Mills ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 57587c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 57687c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 57787c2a1d7SRichard Tran Mills } 57887c2a1d7SRichard Tran Mills 57987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 58087c2a1d7SRichard Tran Mills { 58187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 58287c2a1d7SRichard Tran Mills 58387c2a1d7SRichard Tran Mills PetscFunctionBegin; 58487c2a1d7SRichard Tran Mills ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 58587c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 58687c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 58787c2a1d7SRichard Tran Mills } 58887c2a1d7SRichard Tran Mills 58987c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 59087c2a1d7SRichard Tran Mills { 59187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 59287c2a1d7SRichard Tran Mills 59387c2a1d7SRichard Tran Mills PetscFunctionBegin; 59487c2a1d7SRichard Tran Mills ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 59587c2a1d7SRichard Tran Mills if (str == SAME_NONZERO_PATTERN) { 59687c2a1d7SRichard Tran Mills /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 59787c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 59887c2a1d7SRichard Tran Mills } 59987c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 60087c2a1d7SRichard Tran Mills } 60187c2a1d7SRichard Tran Mills 6024a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 6034a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 6044a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 6054a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 6064a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 6074a2a386eSRichard Tran Mills { 6084a2a386eSRichard Tran Mills PetscErrorCode ierr; 6094a2a386eSRichard Tran Mills Mat B = *newmat; 6104a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 611c9d46305SRichard Tran Mills PetscBool set; 612e9c94282SRichard Tran Mills PetscBool sametype; 6134a2a386eSRichard Tran Mills 6144a2a386eSRichard Tran Mills PetscFunctionBegin; 6154a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 6164a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 6174a2a386eSRichard Tran Mills } 6184a2a386eSRichard Tran Mills 619e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 620e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 621e9c94282SRichard Tran Mills 6224a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 6234a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 6244a2a386eSRichard Tran Mills 625df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 626969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 6274a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 6284a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 6294a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 630c9d46305SRichard Tran Mills 6314abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 632d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 633d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 634d995685eSRichard Tran Mills #elif 635d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 636d995685eSRichard Tran Mills #endif 6374abfa3b3SRichard Tran Mills 6384abfa3b3SRichard Tran Mills /* Parse command line options. */ 639c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 640c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 641c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 642d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 643d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 644d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 645d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 646d995685eSRichard Tran Mills } 647d995685eSRichard Tran Mills #endif 648c9d46305SRichard Tran Mills 649c9d46305SRichard Tran Mills if(!aijmkl->no_SpMV2) { 650d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 651df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 652969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 653df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 654969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 655d995685eSRichard Tran Mills #endif 656c9d46305SRichard Tran Mills } else { 6574a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 658969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 6594a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 660969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 661c9d46305SRichard Tran Mills } 6624a2a386eSRichard Tran Mills 663db63039fSRichard Tran Mills B->ops->scale = MatScale_SeqAIJMKL; 66487c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 66587c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 66687c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJMKL; 667db63039fSRichard Tran Mills 668db63039fSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 6694a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 670e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 671e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 672e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 6734a2a386eSRichard Tran Mills 6744a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 6754a2a386eSRichard Tran Mills *newmat = B; 6764a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6774a2a386eSRichard Tran Mills } 6784a2a386eSRichard Tran Mills 6794a2a386eSRichard Tran Mills /*@C 6804a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 6814a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 6824a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 68390147e49SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 68490147e49SRichard Tran Mills operations are currently supported. 68590147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 68690147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 68790147e49SRichard Tran Mills 6884a2a386eSRichard Tran Mills Collective on MPI_Comm 6894a2a386eSRichard Tran Mills 6904a2a386eSRichard Tran Mills Input Parameters: 6914a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 6924a2a386eSRichard Tran Mills . m - number of rows 6934a2a386eSRichard Tran Mills . n - number of columns 6944a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 6954a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 6964a2a386eSRichard Tran Mills (possibly different for each row) or NULL 6974a2a386eSRichard Tran Mills 6984a2a386eSRichard Tran Mills Output Parameter: 6994a2a386eSRichard Tran Mills . A - the matrix 7004a2a386eSRichard Tran Mills 70190147e49SRichard Tran Mills Options Database Keys: 70290147e49SRichard Tran Mills . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 70390147e49SRichard Tran Mills 7044a2a386eSRichard Tran Mills Notes: 7054a2a386eSRichard Tran Mills If nnz is given then nz is ignored 7064a2a386eSRichard Tran Mills 7074a2a386eSRichard Tran Mills Level: intermediate 7084a2a386eSRichard Tran Mills 70990147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 7104a2a386eSRichard Tran Mills 7114a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 7124a2a386eSRichard Tran Mills @*/ 7134a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 7144a2a386eSRichard Tran Mills { 7154a2a386eSRichard Tran Mills PetscErrorCode ierr; 7164a2a386eSRichard Tran Mills 7174a2a386eSRichard Tran Mills PetscFunctionBegin; 7184a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 7194a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 7204a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 7214a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 7224a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7234a2a386eSRichard Tran Mills } 7244a2a386eSRichard Tran Mills 7254a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 7264a2a386eSRichard Tran Mills { 7274a2a386eSRichard Tran Mills PetscErrorCode ierr; 7284a2a386eSRichard Tran Mills 7294a2a386eSRichard Tran Mills PetscFunctionBegin; 7304a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 7314a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7324a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7334a2a386eSRichard Tran Mills } 734