17072be85SIrina Sokolova /* 27072be85SIrina Sokolova Defines basic operations for the MATSEQBAIJMKL matrix class. 37072be85SIrina Sokolova Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 47072be85SIrina Sokolova wherever possible. If used MKL verion is older than 11.3 PETSc default 57072be85SIrina Sokolova code for sparse matrix operations is used. 67072be85SIrina Sokolova */ 77072be85SIrina Sokolova 87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h> 97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10b9e7e5c1SBarry Smith #include <mkl_spblas.h> 117072be85SIrina Sokolova 12b9e7e5c1SBarry Smith static PetscBool PetscSeqBAIJSupportsZeroBased(void) 13b9e7e5c1SBarry Smith { 14b9e7e5c1SBarry Smith static PetscBool set = PETSC_FALSE,value; 15b9e7e5c1SBarry Smith int n=1,ia[1],ja[1]; 16b9e7e5c1SBarry Smith float a[1]; 17b9e7e5c1SBarry Smith sparse_status_t status; 18b9e7e5c1SBarry Smith sparse_matrix_t A; 197072be85SIrina Sokolova 20b9e7e5c1SBarry Smith if (!set) { 21b9e7e5c1SBarry Smith status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a); 22b9e7e5c1SBarry Smith value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 23b9e7e5c1SBarry Smith (void) mkl_sparse_destroy(A); 24b9e7e5c1SBarry Smith set = PETSC_TRUE; 25b9e7e5c1SBarry Smith } 26b9e7e5c1SBarry Smith return value; 27b9e7e5c1SBarry Smith } 287072be85SIrina Sokolova 297072be85SIrina Sokolova typedef struct { 307072be85SIrina Sokolova PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 317072be85SIrina Sokolova sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 327072be85SIrina Sokolova struct matrix_descr descr; 337072be85SIrina Sokolova PetscInt *ai1; 347072be85SIrina Sokolova PetscInt *aj1; 357072be85SIrina Sokolova } Mat_SeqBAIJMKL; 367072be85SIrina Sokolova 37b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 387072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 397072be85SIrina Sokolova 407072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 417072be85SIrina Sokolova { 427072be85SIrina Sokolova /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 437072be85SIrina Sokolova /* so we will ignore 'MatType type'. */ 447072be85SIrina Sokolova Mat B = *newmat; 457072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 467072be85SIrina Sokolova 477072be85SIrina Sokolova PetscFunctionBegin; 489566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 497072be85SIrina Sokolova 507072be85SIrina Sokolova /* Reset the original function pointers. */ 517072be85SIrina Sokolova B->ops->duplicate = MatDuplicate_SeqBAIJ; 527072be85SIrina Sokolova B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 537072be85SIrina Sokolova B->ops->destroy = MatDestroy_SeqBAIJ; 547072be85SIrina Sokolova B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 557072be85SIrina Sokolova B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 567072be85SIrina Sokolova B->ops->scale = MatScale_SeqBAIJ; 577072be85SIrina Sokolova B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 587072be85SIrina Sokolova B->ops->axpy = MatAXPY_SeqBAIJ; 597072be85SIrina Sokolova 607072be85SIrina Sokolova switch (A->rmap->bs) { 617072be85SIrina Sokolova case 1: 627072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_1; 637072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_1; 647072be85SIrina Sokolova break; 657072be85SIrina Sokolova case 2: 667072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_2; 677072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_2; 687072be85SIrina Sokolova break; 697072be85SIrina Sokolova case 3: 707072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_3; 717072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_3; 727072be85SIrina Sokolova break; 737072be85SIrina Sokolova case 4: 747072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_4; 757072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_4; 767072be85SIrina Sokolova break; 777072be85SIrina Sokolova case 5: 787072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_5; 797072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_5; 807072be85SIrina Sokolova break; 817072be85SIrina Sokolova case 6: 827072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_6; 837072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_6; 847072be85SIrina Sokolova break; 857072be85SIrina Sokolova case 7: 867072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_7; 877072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_7; 887072be85SIrina Sokolova break; 897072be85SIrina Sokolova case 11: 907072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_11; 917072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_11; 927072be85SIrina Sokolova break; 937072be85SIrina Sokolova case 15: 947072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_15_ver1; 957072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_N; 967072be85SIrina Sokolova break; 977072be85SIrina Sokolova default: 987072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_N; 997072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1007072be85SIrina Sokolova break; 1017072be85SIrina Sokolova } 1029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL)); 1037072be85SIrina Sokolova 1047072be85SIrina Sokolova /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 1057072be85SIrina Sokolova * simply involves destroying the MKL sparse matrix handle and then freeing 1067072be85SIrina Sokolova * the spptr pointer. */ 1077072be85SIrina Sokolova if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 1087072be85SIrina Sokolova 109*792fecdfSBarry Smith if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy,baijmkl->bsrA); 1109566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 1119566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 1127072be85SIrina Sokolova 1137072be85SIrina Sokolova /* Change the type of B to MATSEQBAIJ. */ 1149566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 1157072be85SIrina Sokolova 1167072be85SIrina Sokolova *newmat = B; 1177072be85SIrina Sokolova PetscFunctionReturn(0); 1187072be85SIrina Sokolova } 1197072be85SIrina Sokolova 120b9e7e5c1SBarry Smith static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 1217072be85SIrina Sokolova { 1227072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 1237072be85SIrina Sokolova 1247072be85SIrina Sokolova PetscFunctionBegin; 1257072be85SIrina Sokolova if (baijmkl) { 1267072be85SIrina Sokolova /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 127*792fecdfSBarry Smith if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy,baijmkl->bsrA); 1289566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 1299566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 1307072be85SIrina Sokolova } 1317072be85SIrina Sokolova 1327072be85SIrina Sokolova /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 1337072be85SIrina Sokolova * to destroy everything that remains. */ 1349566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 1359566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqBAIJ(A)); 1367072be85SIrina Sokolova PetscFunctionReturn(0); 1377072be85SIrina Sokolova } 1387072be85SIrina Sokolova 139b9e7e5c1SBarry Smith static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 1407072be85SIrina Sokolova { 1417072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1427072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 1430835cbf9SRichard Tran Mills PetscInt mbs, nbs, nz, bs; 1447072be85SIrina Sokolova MatScalar *aa; 1457072be85SIrina Sokolova PetscInt *aj,*ai; 14680278ffbSSatish Balay PetscInt i; 1477072be85SIrina Sokolova 1487072be85SIrina Sokolova PetscFunctionBegin; 1497072be85SIrina Sokolova if (baijmkl->sparse_optimized) { 1507072be85SIrina Sokolova /* Matrix has been previously assembled and optimized. Must destroy old 1517072be85SIrina Sokolova * matrix handle before running the optimization step again. */ 1529566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 1539566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 1547072be85SIrina Sokolova } 1557072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_FALSE; 1567072be85SIrina Sokolova 1577072be85SIrina Sokolova /* Now perform the SpMV2 setup and matrix optimization. */ 1587072be85SIrina Sokolova baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 1597072be85SIrina Sokolova baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 1607072be85SIrina Sokolova baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 1617072be85SIrina Sokolova mbs = a->mbs; 1627072be85SIrina Sokolova nbs = a->nbs; 1637072be85SIrina Sokolova nz = a->nz; 1647072be85SIrina Sokolova bs = A->rmap->bs; 1657072be85SIrina Sokolova aa = a->a; 1667072be85SIrina Sokolova 16780095d54SIrina Sokolova if ((nz!=0) & !(A->structure_only)) { 1687072be85SIrina Sokolova /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1697072be85SIrina Sokolova * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170b9e7e5c1SBarry Smith if (PetscSeqBAIJSupportsZeroBased()) { 1717072be85SIrina Sokolova aj = a->j; 1727072be85SIrina Sokolova ai = a->i; 1739566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa)); 174b9e7e5c1SBarry Smith } else { 1759566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs+1,&ai,nz,&aj)); 176b9e7e5c1SBarry Smith for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1; 177b9e7e5c1SBarry Smith for (i=0;i<nz;i++) aj[i] = a->j[i]+1; 1787072be85SIrina Sokolova aa = a->a; 1799566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa)); 1807072be85SIrina Sokolova baijmkl->ai1 = ai; 1817072be85SIrina Sokolova baijmkl->aj1 = aj; 182b9e7e5c1SBarry Smith } 1839566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000)); 1849566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE)); 1859566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 1867072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_TRUE; 1877072be85SIrina Sokolova } 1887072be85SIrina Sokolova PetscFunctionReturn(0); 1897072be85SIrina Sokolova } 1907072be85SIrina Sokolova 191b9e7e5c1SBarry Smith static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 1927072be85SIrina Sokolova { 1937072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl; 1947072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl_dest; 1957072be85SIrina Sokolova 1967072be85SIrina Sokolova PetscFunctionBegin; 1979566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqBAIJ(A,op,M)); 1987072be85SIrina Sokolova baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 1999566063dSJacob Faibussowitsch PetscCall(PetscNewLog((*M),&baijmkl_dest)); 20071bc03e0SIrina Sokolova (*M)->spptr = (void*)baijmkl_dest; 2019566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL))); 2027072be85SIrina Sokolova baijmkl_dest->sparse_optimized = PETSC_FALSE; 2039566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2047072be85SIrina Sokolova PetscFunctionReturn(0); 2057072be85SIrina Sokolova } 2067072be85SIrina Sokolova 207b9e7e5c1SBarry Smith static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 2087072be85SIrina Sokolova { 2097072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2107072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 2117072be85SIrina Sokolova const PetscScalar *x; 2127072be85SIrina Sokolova PetscScalar *y; 2137072be85SIrina Sokolova 2147072be85SIrina Sokolova PetscFunctionBegin; 2157072be85SIrina Sokolova /* If there are no nonzero entries, zero yy and return immediately. */ 2167072be85SIrina Sokolova if (!a->nz) { 2179566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 2187072be85SIrina Sokolova PetscFunctionReturn(0); 2197072be85SIrina Sokolova } 2207072be85SIrina Sokolova 2219566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2229566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2237072be85SIrina Sokolova 2247072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2257072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2267072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 2277072be85SIrina Sokolova if (!baijmkl->sparse_optimized) { 2289566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2297072be85SIrina Sokolova } 2307072be85SIrina Sokolova 2317072be85SIrina Sokolova /* Call MKL SpMV2 executor routine to do the MatMult. */ 2329566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y)); 2337072be85SIrina Sokolova 2349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs)); 2359566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2377072be85SIrina Sokolova PetscFunctionReturn(0); 2387072be85SIrina Sokolova } 2397072be85SIrina Sokolova 240b9e7e5c1SBarry Smith static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 2417072be85SIrina Sokolova { 2427072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2437072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 2447072be85SIrina Sokolova const PetscScalar *x; 2457072be85SIrina Sokolova PetscScalar *y; 2467072be85SIrina Sokolova 2477072be85SIrina Sokolova PetscFunctionBegin; 2487072be85SIrina Sokolova /* If there are no nonzero entries, zero yy and return immediately. */ 2497072be85SIrina Sokolova if (!a->nz) { 2509566063dSJacob Faibussowitsch PetscCall(VecSet(yy,0.0)); 2517072be85SIrina Sokolova PetscFunctionReturn(0); 2527072be85SIrina Sokolova } 2537072be85SIrina Sokolova 2549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2559566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 2567072be85SIrina Sokolova 2577072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2587072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2597072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 2607072be85SIrina Sokolova if (!baijmkl->sparse_optimized) { 2619566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2627072be85SIrina Sokolova } 2637072be85SIrina Sokolova 2647072be85SIrina Sokolova /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 2659566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y)); 2667072be85SIrina Sokolova 2679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs)); 2689566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 2699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 2707072be85SIrina Sokolova PetscFunctionReturn(0); 2717072be85SIrina Sokolova } 2727072be85SIrina Sokolova 273b9e7e5c1SBarry Smith static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 2747072be85SIrina Sokolova { 2757072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2767072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 2777072be85SIrina Sokolova const PetscScalar *x; 2787072be85SIrina Sokolova PetscScalar *y,*z; 2797072be85SIrina Sokolova PetscInt m=a->mbs*A->rmap->bs; 2807072be85SIrina Sokolova PetscInt i; 2817072be85SIrina Sokolova 2827072be85SIrina Sokolova PetscFunctionBegin; 2837072be85SIrina Sokolova /* If there are no nonzero entries, set zz = yy and return immediately. */ 2847072be85SIrina Sokolova if (!a->nz) { 2859566063dSJacob Faibussowitsch PetscCall(VecCopy(yy,zz)); 2867072be85SIrina Sokolova PetscFunctionReturn(0); 2877072be85SIrina Sokolova } 2887072be85SIrina Sokolova 2899566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 2909566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 2917072be85SIrina Sokolova 2927072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2937072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2947072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 2957072be85SIrina Sokolova if (!baijmkl->sparse_optimized) { 2969566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2977072be85SIrina Sokolova } 2987072be85SIrina Sokolova 2997072be85SIrina Sokolova /* Call MKL sparse BLAS routine to do the MatMult. */ 3007072be85SIrina Sokolova if (zz == yy) { 3017072be85SIrina Sokolova /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 3027072be85SIrina Sokolova * with alpha and beta both set to 1.0. */ 3039566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z)); 3047072be85SIrina Sokolova } else { 3057072be85SIrina Sokolova /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 3067072be85SIrina Sokolova * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 3079566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z)); 3087072be85SIrina Sokolova for (i=0; i<m; i++) { 3097072be85SIrina Sokolova z[i] += y[i]; 3107072be85SIrina Sokolova } 3117072be85SIrina Sokolova } 3127072be85SIrina Sokolova 3139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->bs2*a->nz)); 3149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 3167072be85SIrina Sokolova PetscFunctionReturn(0); 3177072be85SIrina Sokolova } 3187072be85SIrina Sokolova 319b9e7e5c1SBarry Smith static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 3207072be85SIrina Sokolova { 3217072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3227072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 3237072be85SIrina Sokolova const PetscScalar *x; 3247072be85SIrina Sokolova PetscScalar *y,*z; 3257072be85SIrina Sokolova PetscInt n=a->nbs*A->rmap->bs; 3267072be85SIrina Sokolova PetscInt i; 3277072be85SIrina Sokolova /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 3287072be85SIrina Sokolova 3297072be85SIrina Sokolova PetscFunctionBegin; 3307072be85SIrina Sokolova /* If there are no nonzero entries, set zz = yy and return immediately. */ 3317072be85SIrina Sokolova if (!a->nz) { 3329566063dSJacob Faibussowitsch PetscCall(VecCopy(yy,zz)); 3337072be85SIrina Sokolova PetscFunctionReturn(0); 3347072be85SIrina Sokolova } 3357072be85SIrina Sokolova 3369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3379566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 3387072be85SIrina Sokolova 3397072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3407072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3417072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3427072be85SIrina Sokolova if (!baijmkl->sparse_optimized) { 3439566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 3447072be85SIrina Sokolova } 3457072be85SIrina Sokolova 3467072be85SIrina Sokolova /* Call MKL sparse BLAS routine to do the MatMult. */ 3477072be85SIrina Sokolova if (zz == yy) { 3487072be85SIrina Sokolova /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 3497072be85SIrina Sokolova * with alpha and beta both set to 1.0. */ 3509566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z)); 3517072be85SIrina Sokolova } else { 3527072be85SIrina Sokolova /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 3537072be85SIrina Sokolova * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 3549566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z)); 3557072be85SIrina Sokolova for (i=0; i<n; i++) { 3567072be85SIrina Sokolova z[i] += y[i]; 3577072be85SIrina Sokolova } 3587072be85SIrina Sokolova } 3597072be85SIrina Sokolova 3609566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->bs2*a->nz)); 3619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 3637072be85SIrina Sokolova PetscFunctionReturn(0); 3647072be85SIrina Sokolova } 3657072be85SIrina Sokolova 366b9e7e5c1SBarry Smith static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 3677072be85SIrina Sokolova { 3687072be85SIrina Sokolova PetscFunctionBegin; 3699566063dSJacob Faibussowitsch PetscCall(MatScale_SeqBAIJ(inA,alpha)); 3709566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 3717072be85SIrina Sokolova PetscFunctionReturn(0); 3727072be85SIrina Sokolova } 3737072be85SIrina Sokolova 374b9e7e5c1SBarry Smith static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 3757072be85SIrina Sokolova { 3767072be85SIrina Sokolova PetscFunctionBegin; 3779566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale_SeqBAIJ(A,ll,rr)); 3789566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 3797072be85SIrina Sokolova PetscFunctionReturn(0); 3807072be85SIrina Sokolova } 3817072be85SIrina Sokolova 382b9e7e5c1SBarry Smith static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 3837072be85SIrina Sokolova { 3847072be85SIrina Sokolova PetscFunctionBegin; 3859566063dSJacob Faibussowitsch PetscCall(MatAXPY_SeqBAIJ(Y,a,X,str)); 3867072be85SIrina Sokolova if (str == SAME_NONZERO_PATTERN) { 3877072be85SIrina Sokolova /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 3889566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 3897072be85SIrina Sokolova } 3907072be85SIrina Sokolova PetscFunctionReturn(0); 3917072be85SIrina Sokolova } 3927072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 3937072be85SIrina Sokolova * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 3947072be85SIrina Sokolova * routine, but can also be used to convert an assembled SeqBAIJ matrix 3957072be85SIrina Sokolova * into a SeqBAIJMKL one. */ 3967072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 3977072be85SIrina Sokolova { 3987072be85SIrina Sokolova Mat B = *newmat; 3997072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl; 4007072be85SIrina Sokolova PetscBool sametype; 4017072be85SIrina Sokolova 4027072be85SIrina Sokolova PetscFunctionBegin; 4039566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 4047072be85SIrina Sokolova 4059566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 4067072be85SIrina Sokolova if (sametype) PetscFunctionReturn(0); 4077072be85SIrina Sokolova 4089566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B,&baijmkl)); 4097072be85SIrina Sokolova B->spptr = (void*)baijmkl; 4107072be85SIrina Sokolova 4117072be85SIrina Sokolova /* Set function pointers for methods that we inherit from BAIJ but override. 4127072be85SIrina Sokolova * We also parse some command line options below, since those determine some of the methods we point to. */ 4137072be85SIrina Sokolova B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 4147072be85SIrina Sokolova 4157072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_FALSE; 4167072be85SIrina Sokolova 4179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL)); 4189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ)); 4197072be85SIrina Sokolova 4209566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL)); 4217072be85SIrina Sokolova *newmat = B; 4227072be85SIrina Sokolova PetscFunctionReturn(0); 4237072be85SIrina Sokolova } 4249c46acdfSRichard Tran Mills 425b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 4264d6dccb5SIrina Sokolova { 4274d6dccb5SIrina Sokolova PetscFunctionBegin; 4284d6dccb5SIrina Sokolova if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 4299566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 4309566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 4314d6dccb5SIrina Sokolova A->ops->destroy = MatDestroy_SeqBAIJMKL; 4324d6dccb5SIrina Sokolova A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 4334d6dccb5SIrina Sokolova A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 4344d6dccb5SIrina Sokolova A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 4354d6dccb5SIrina Sokolova A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 4364d6dccb5SIrina Sokolova A->ops->scale = MatScale_SeqBAIJMKL; 4374d6dccb5SIrina Sokolova A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 4384d6dccb5SIrina Sokolova A->ops->axpy = MatAXPY_SeqBAIJMKL; 4394d6dccb5SIrina Sokolova A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 4404d6dccb5SIrina Sokolova PetscFunctionReturn(0); 4414d6dccb5SIrina Sokolova } 4429c46acdfSRichard Tran Mills 4437072be85SIrina Sokolova /*@C 4447072be85SIrina Sokolova MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 4457072be85SIrina Sokolova This type inherits from BAIJ and is largely identical, but uses sparse BLAS 4467072be85SIrina Sokolova routines from Intel MKL whenever possible. 4477072be85SIrina Sokolova MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 4487072be85SIrina Sokolova operations are currently supported. 4497072be85SIrina Sokolova If the installed version of MKL supports the "SpMV2" sparse 4507072be85SIrina Sokolova inspector-executor routines, then those are used by default. 4517072be85SIrina Sokolova Default PETSc kernels are used otherwise. 4527072be85SIrina Sokolova 4537072be85SIrina Sokolova Input Parameters: 4547072be85SIrina Sokolova + comm - MPI communicator, set to PETSC_COMM_SELF 4557072be85SIrina Sokolova . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 4567072be85SIrina Sokolova blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 4577072be85SIrina Sokolova . m - number of rows 4587072be85SIrina Sokolova . n - number of columns 4597072be85SIrina Sokolova . nz - number of nonzero blocks per block row (same for all rows) 4607072be85SIrina Sokolova - nnz - array containing the number of nonzero blocks in the various block rows 4617072be85SIrina Sokolova (possibly different for each block row) or NULL 4627072be85SIrina Sokolova 4637072be85SIrina Sokolova Output Parameter: 4647072be85SIrina Sokolova . A - the matrix 4657072be85SIrina Sokolova 4667072be85SIrina Sokolova It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 467f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 4687072be85SIrina Sokolova [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4697072be85SIrina Sokolova 4707072be85SIrina Sokolova Options Database Keys: 471a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 4727072be85SIrina Sokolova block calculations (much slower) 473a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 4747072be85SIrina Sokolova 4757072be85SIrina Sokolova Level: intermediate 4767072be85SIrina Sokolova 4777072be85SIrina Sokolova Notes: 4787072be85SIrina Sokolova The number of rows and columns must be divisible by blocksize. 4797072be85SIrina Sokolova 4807072be85SIrina Sokolova If the nnz parameter is given then the nz parameter is ignored 4817072be85SIrina Sokolova 4827072be85SIrina Sokolova A nonzero block is any block that as 1 or more nonzeros in it 4837072be85SIrina Sokolova 4847072be85SIrina Sokolova The block AIJ format is fully compatible with standard Fortran 77 4857072be85SIrina Sokolova storage. That is, the stored row and column indices can begin at 4867072be85SIrina Sokolova either one (as in Fortran) or zero. See the users' manual for details. 4877072be85SIrina Sokolova 4887072be85SIrina Sokolova Specify the preallocated storage with either nz or nnz (not both). 4897072be85SIrina Sokolova Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 4907072be85SIrina Sokolova allocation. See Users-Manual: ch_mat for details. 4917072be85SIrina Sokolova matrices. 4927072be85SIrina Sokolova 493db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 4947072be85SIrina Sokolova 4957072be85SIrina Sokolova @*/ 4967072be85SIrina Sokolova PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 4977072be85SIrina Sokolova { 4987072be85SIrina Sokolova PetscFunctionBegin; 4999566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 5009566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,m,n)); 5019566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSEQBAIJMKL)); 5029566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz)); 5037072be85SIrina Sokolova PetscFunctionReturn(0); 5047072be85SIrina Sokolova } 5059c46acdfSRichard Tran Mills 5067072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 5077072be85SIrina Sokolova { 5087072be85SIrina Sokolova PetscFunctionBegin; 5099566063dSJacob Faibussowitsch PetscCall(MatSetType(A,MATSEQBAIJ)); 5109566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A)); 5117072be85SIrina Sokolova PetscFunctionReturn(0); 5127072be85SIrina Sokolova } 513