xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
17072be85SIrina Sokolova /*
27072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
37072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
47072be85SIrina Sokolova   wherever possible. If used MKL verion is older than 11.3 PETSc default
57072be85SIrina Sokolova   code for sparse matrix operations is used.
67072be85SIrina Sokolova */
77072be85SIrina Sokolova 
87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10b9e7e5c1SBarry Smith #include <mkl_spblas.h>
117072be85SIrina Sokolova 
129371c9d4SSatish Balay static PetscBool PetscSeqBAIJSupportsZeroBased(void) {
13b9e7e5c1SBarry Smith   static PetscBool set = PETSC_FALSE, value;
14b9e7e5c1SBarry Smith   int              n   = 1, ia[1], ja[1];
15b9e7e5c1SBarry Smith   float            a[1];
16b9e7e5c1SBarry Smith   sparse_status_t  status;
17b9e7e5c1SBarry Smith   sparse_matrix_t  A;
187072be85SIrina Sokolova 
19b9e7e5c1SBarry Smith   if (!set) {
20b9e7e5c1SBarry Smith     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a);
21b9e7e5c1SBarry Smith     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
22b9e7e5c1SBarry Smith     (void)mkl_sparse_destroy(A);
23b9e7e5c1SBarry Smith     set = PETSC_TRUE;
24b9e7e5c1SBarry Smith   }
25b9e7e5c1SBarry Smith   return value;
26b9e7e5c1SBarry Smith }
277072be85SIrina Sokolova 
287072be85SIrina Sokolova typedef struct {
297072be85SIrina Sokolova   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
307072be85SIrina Sokolova   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
317072be85SIrina Sokolova   struct matrix_descr descr;
327072be85SIrina Sokolova   PetscInt           *ai1;
337072be85SIrina Sokolova   PetscInt           *aj1;
347072be85SIrina Sokolova } Mat_SeqBAIJMKL;
357072be85SIrina Sokolova 
36b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
377072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
387072be85SIrina Sokolova 
399371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
407072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
417072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
427072be85SIrina Sokolova   Mat             B       = *newmat;
437072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
447072be85SIrina Sokolova 
457072be85SIrina Sokolova   PetscFunctionBegin;
469566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
477072be85SIrina Sokolova 
487072be85SIrina Sokolova   /* Reset the original function pointers. */
497072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
507072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
517072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
527072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
537072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
547072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
557072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
567072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
577072be85SIrina Sokolova 
587072be85SIrina Sokolova   switch (A->rmap->bs) {
597072be85SIrina Sokolova   case 1:
607072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_1;
617072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
627072be85SIrina Sokolova     break;
637072be85SIrina Sokolova   case 2:
647072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_2;
657072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
667072be85SIrina Sokolova     break;
677072be85SIrina Sokolova   case 3:
687072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_3;
697072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
707072be85SIrina Sokolova     break;
717072be85SIrina Sokolova   case 4:
727072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_4;
737072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
747072be85SIrina Sokolova     break;
757072be85SIrina Sokolova   case 5:
767072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_5;
777072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
787072be85SIrina Sokolova     break;
797072be85SIrina Sokolova   case 6:
807072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_6;
817072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
827072be85SIrina Sokolova     break;
837072be85SIrina Sokolova   case 7:
847072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_7;
857072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
867072be85SIrina Sokolova     break;
877072be85SIrina Sokolova   case 11:
887072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_11;
897072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
907072be85SIrina Sokolova     break;
917072be85SIrina Sokolova   case 15:
927072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
937072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
947072be85SIrina Sokolova     break;
957072be85SIrina Sokolova   default:
967072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_N;
977072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
987072be85SIrina Sokolova     break;
997072be85SIrina Sokolova   }
1009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
1017072be85SIrina Sokolova 
1027072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
1037072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
1047072be85SIrina Sokolova    * the spptr pointer. */
1057072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
1067072be85SIrina Sokolova 
107792fecdfSBarry Smith   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1089566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1099566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
1107072be85SIrina Sokolova 
1117072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
1129566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
1137072be85SIrina Sokolova 
1147072be85SIrina Sokolova   *newmat = B;
1157072be85SIrina Sokolova   PetscFunctionReturn(0);
1167072be85SIrina Sokolova }
1177072be85SIrina Sokolova 
1189371c9d4SSatish Balay static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) {
1197072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1207072be85SIrina Sokolova 
1217072be85SIrina Sokolova   PetscFunctionBegin;
1227072be85SIrina Sokolova   if (baijmkl) {
1237072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
124792fecdfSBarry Smith     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1259566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1269566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
1277072be85SIrina Sokolova   }
1287072be85SIrina Sokolova 
1297072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
1307072be85SIrina Sokolova    * to destroy everything that remains. */
1319566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
1329566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqBAIJ(A));
1337072be85SIrina Sokolova   PetscFunctionReturn(0);
1347072be85SIrina Sokolova }
1357072be85SIrina Sokolova 
1369371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) {
1377072be85SIrina Sokolova   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
1387072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1390835cbf9SRichard Tran Mills   PetscInt        mbs, nbs, nz, bs;
1407072be85SIrina Sokolova   MatScalar      *aa;
1417072be85SIrina Sokolova   PetscInt       *aj, *ai;
14280278ffbSSatish Balay   PetscInt        i;
1437072be85SIrina Sokolova 
1447072be85SIrina Sokolova   PetscFunctionBegin;
1457072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1467072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
1477072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
1489566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1499566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
1507072be85SIrina Sokolova   }
1517072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
1527072be85SIrina Sokolova 
1537072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
1547072be85SIrina Sokolova   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
1557072be85SIrina Sokolova   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
1567072be85SIrina Sokolova   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
1577072be85SIrina Sokolova   mbs                 = a->mbs;
1587072be85SIrina Sokolova   nbs                 = a->nbs;
1597072be85SIrina Sokolova   nz                  = a->nz;
1607072be85SIrina Sokolova   bs                  = A->rmap->bs;
1617072be85SIrina Sokolova   aa                  = a->a;
1627072be85SIrina Sokolova 
16380095d54SIrina Sokolova   if ((nz != 0) & !(A->structure_only)) {
1647072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1657072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
166b9e7e5c1SBarry Smith     if (PetscSeqBAIJSupportsZeroBased()) {
1677072be85SIrina Sokolova       aj = a->j;
1687072be85SIrina Sokolova       ai = a->i;
1699566063dSJacob Faibussowitsch       PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
170b9e7e5c1SBarry Smith     } else {
1719566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
172b9e7e5c1SBarry Smith       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
173b9e7e5c1SBarry Smith       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
1747072be85SIrina Sokolova       aa = a->a;
1759566063dSJacob Faibussowitsch       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
1767072be85SIrina Sokolova       baijmkl->ai1 = ai;
1777072be85SIrina Sokolova       baijmkl->aj1 = aj;
178b9e7e5c1SBarry Smith     }
1799566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
1809566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
1819566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
1827072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
1837072be85SIrina Sokolova   }
1847072be85SIrina Sokolova   PetscFunctionReturn(0);
1857072be85SIrina Sokolova }
1867072be85SIrina Sokolova 
1879371c9d4SSatish Balay static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) {
1887072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
1897072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
1907072be85SIrina Sokolova 
1917072be85SIrina Sokolova   PetscFunctionBegin;
1929566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
1937072be85SIrina Sokolova   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1949566063dSJacob Faibussowitsch   PetscCall(PetscNewLog((*M), &baijmkl_dest));
19571bc03e0SIrina Sokolova   (*M)->spptr = (void *)baijmkl_dest;
1969566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
1977072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
1989566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
1997072be85SIrina Sokolova   PetscFunctionReturn(0);
2007072be85SIrina Sokolova }
2017072be85SIrina Sokolova 
2029371c9d4SSatish Balay static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
2037072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2047072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2057072be85SIrina Sokolova   const PetscScalar *x;
2067072be85SIrina Sokolova   PetscScalar       *y;
2077072be85SIrina Sokolova 
2087072be85SIrina Sokolova   PetscFunctionBegin;
2097072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2107072be85SIrina Sokolova   if (!a->nz) {
2119566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2127072be85SIrina Sokolova     PetscFunctionReturn(0);
2137072be85SIrina Sokolova   }
2147072be85SIrina Sokolova 
2159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2177072be85SIrina Sokolova 
2187072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2197072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2207072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
221*48a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2227072be85SIrina Sokolova 
2237072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
2249566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2257072be85SIrina Sokolova 
2269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2297072be85SIrina Sokolova   PetscFunctionReturn(0);
2307072be85SIrina Sokolova }
2317072be85SIrina Sokolova 
2329371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
2337072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2347072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2357072be85SIrina Sokolova   const PetscScalar *x;
2367072be85SIrina Sokolova   PetscScalar       *y;
2377072be85SIrina Sokolova 
2387072be85SIrina Sokolova   PetscFunctionBegin;
2397072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2407072be85SIrina Sokolova   if (!a->nz) {
2419566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2427072be85SIrina Sokolova     PetscFunctionReturn(0);
2437072be85SIrina Sokolova   }
2447072be85SIrina Sokolova 
2459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2477072be85SIrina Sokolova 
2487072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2497072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2507072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
251*48a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2527072be85SIrina Sokolova 
2537072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
2549566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2557072be85SIrina Sokolova 
2569566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2597072be85SIrina Sokolova   PetscFunctionReturn(0);
2607072be85SIrina Sokolova }
2617072be85SIrina Sokolova 
2629371c9d4SSatish Balay static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
2637072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2647072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2657072be85SIrina Sokolova   const PetscScalar *x;
2667072be85SIrina Sokolova   PetscScalar       *y, *z;
2677072be85SIrina Sokolova   PetscInt           m = a->mbs * A->rmap->bs;
2687072be85SIrina Sokolova   PetscInt           i;
2697072be85SIrina Sokolova 
2707072be85SIrina Sokolova   PetscFunctionBegin;
2717072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
2727072be85SIrina Sokolova   if (!a->nz) {
2739566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
2747072be85SIrina Sokolova     PetscFunctionReturn(0);
2757072be85SIrina Sokolova   }
2767072be85SIrina Sokolova 
2779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
2797072be85SIrina Sokolova 
2807072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2817072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2827072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
283*48a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2847072be85SIrina Sokolova 
2857072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
2867072be85SIrina Sokolova   if (zz == yy) {
2877072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
2887072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
2899566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
2907072be85SIrina Sokolova   } else {
2917072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
2927072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
2939566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
2949371c9d4SSatish Balay     for (i = 0; i < m; i++) { z[i] += y[i]; }
2957072be85SIrina Sokolova   }
2967072be85SIrina Sokolova 
2979566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
2989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3007072be85SIrina Sokolova   PetscFunctionReturn(0);
3017072be85SIrina Sokolova }
3027072be85SIrina Sokolova 
3039371c9d4SSatish Balay static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
3047072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
3057072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
3067072be85SIrina Sokolova   const PetscScalar *x;
3077072be85SIrina Sokolova   PetscScalar       *y, *z;
3087072be85SIrina Sokolova   PetscInt           n = a->nbs * A->rmap->bs;
3097072be85SIrina Sokolova   PetscInt           i;
3107072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
3117072be85SIrina Sokolova 
3127072be85SIrina Sokolova   PetscFunctionBegin;
3137072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3147072be85SIrina Sokolova   if (!a->nz) {
3159566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
3167072be85SIrina Sokolova     PetscFunctionReturn(0);
3177072be85SIrina Sokolova   }
3187072be85SIrina Sokolova 
3199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3209566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
3217072be85SIrina Sokolova 
3227072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3237072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3247072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
325*48a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3267072be85SIrina Sokolova 
3277072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3287072be85SIrina Sokolova   if (zz == yy) {
3297072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3307072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
3319566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3327072be85SIrina Sokolova   } else {
3337072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3347072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3359566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
3369371c9d4SSatish Balay     for (i = 0; i < n; i++) { z[i] += y[i]; }
3377072be85SIrina Sokolova   }
3387072be85SIrina Sokolova 
3399566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3427072be85SIrina Sokolova   PetscFunctionReturn(0);
3437072be85SIrina Sokolova }
3447072be85SIrina Sokolova 
3459371c9d4SSatish Balay static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) {
3467072be85SIrina Sokolova   PetscFunctionBegin;
3479566063dSJacob Faibussowitsch   PetscCall(MatScale_SeqBAIJ(inA, alpha));
3489566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
3497072be85SIrina Sokolova   PetscFunctionReturn(0);
3507072be85SIrina Sokolova }
3517072be85SIrina Sokolova 
3529371c9d4SSatish Balay static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) {
3537072be85SIrina Sokolova   PetscFunctionBegin;
3549566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
3559566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3567072be85SIrina Sokolova   PetscFunctionReturn(0);
3577072be85SIrina Sokolova }
3587072be85SIrina Sokolova 
3599371c9d4SSatish Balay static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) {
3607072be85SIrina Sokolova   PetscFunctionBegin;
3619566063dSJacob Faibussowitsch   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
3627072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
3637072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
3649566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
3657072be85SIrina Sokolova   }
3667072be85SIrina Sokolova   PetscFunctionReturn(0);
3677072be85SIrina Sokolova }
3687072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
3697072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
3707072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
3717072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
3729371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
3737072be85SIrina Sokolova   Mat             B = *newmat;
3747072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
3757072be85SIrina Sokolova   PetscBool       sametype;
3767072be85SIrina Sokolova 
3777072be85SIrina Sokolova   PetscFunctionBegin;
3789566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
3797072be85SIrina Sokolova 
3809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
3817072be85SIrina Sokolova   if (sametype) PetscFunctionReturn(0);
3827072be85SIrina Sokolova 
3839566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B, &baijmkl));
3847072be85SIrina Sokolova   B->spptr = (void *)baijmkl;
3857072be85SIrina Sokolova 
3867072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
3877072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
3887072be85SIrina Sokolova   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
3897072be85SIrina Sokolova 
3907072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
3917072be85SIrina Sokolova 
3929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL));
3939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
3947072be85SIrina Sokolova 
3959566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
3967072be85SIrina Sokolova   *newmat = B;
3977072be85SIrina Sokolova   PetscFunctionReturn(0);
3987072be85SIrina Sokolova }
3999c46acdfSRichard Tran Mills 
4009371c9d4SSatish Balay static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) {
4014d6dccb5SIrina Sokolova   PetscFunctionBegin;
4024d6dccb5SIrina Sokolova   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
4039566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
4049566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
4054d6dccb5SIrina Sokolova   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
4064d6dccb5SIrina Sokolova   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
4074d6dccb5SIrina Sokolova   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
4084d6dccb5SIrina Sokolova   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
4094d6dccb5SIrina Sokolova   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
4104d6dccb5SIrina Sokolova   A->ops->scale            = MatScale_SeqBAIJMKL;
4114d6dccb5SIrina Sokolova   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
4124d6dccb5SIrina Sokolova   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
4134d6dccb5SIrina Sokolova   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
4144d6dccb5SIrina Sokolova   PetscFunctionReturn(0);
4154d6dccb5SIrina Sokolova }
4169c46acdfSRichard Tran Mills 
4177072be85SIrina Sokolova /*@C
4187072be85SIrina Sokolova    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
4197072be85SIrina Sokolova    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
4207072be85SIrina Sokolova    routines from Intel MKL whenever possible.
4217072be85SIrina Sokolova    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
4227072be85SIrina Sokolova    operations are currently supported.
4237072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
4247072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
4257072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
4267072be85SIrina Sokolova 
4277072be85SIrina Sokolova    Input Parameters:
4287072be85SIrina Sokolova +  comm - MPI communicator, set to PETSC_COMM_SELF
4297072be85SIrina Sokolova .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
4307072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
4317072be85SIrina Sokolova .  m - number of rows
4327072be85SIrina Sokolova .  n - number of columns
4337072be85SIrina Sokolova .  nz - number of nonzero blocks  per block row (same for all rows)
4347072be85SIrina Sokolova -  nnz - array containing the number of nonzero blocks in the various block rows
4357072be85SIrina Sokolova          (possibly different for each block row) or NULL
4367072be85SIrina Sokolova 
4377072be85SIrina Sokolova    Output Parameter:
4387072be85SIrina Sokolova .  A - the matrix
4397072be85SIrina Sokolova 
4407072be85SIrina Sokolova    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
441f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4427072be85SIrina Sokolova    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4437072be85SIrina Sokolova 
4447072be85SIrina Sokolova    Options Database Keys:
445a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
4467072be85SIrina Sokolova                      block calculations (much slower)
447a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
4487072be85SIrina Sokolova 
4497072be85SIrina Sokolova    Level: intermediate
4507072be85SIrina Sokolova 
4517072be85SIrina Sokolova    Notes:
4527072be85SIrina Sokolova    The number of rows and columns must be divisible by blocksize.
4537072be85SIrina Sokolova 
4547072be85SIrina Sokolova    If the nnz parameter is given then the nz parameter is ignored
4557072be85SIrina Sokolova 
4567072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
4577072be85SIrina Sokolova 
4587072be85SIrina Sokolova    The block AIJ format is fully compatible with standard Fortran 77
4597072be85SIrina Sokolova    storage.  That is, the stored row and column indices can begin at
4607072be85SIrina Sokolova    either one (as in Fortran) or zero.  See the users' manual for details.
4617072be85SIrina Sokolova 
4627072be85SIrina Sokolova    Specify the preallocated storage with either nz or nnz (not both).
4637072be85SIrina Sokolova    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
4647072be85SIrina Sokolova    allocation.  See Users-Manual: ch_mat for details.
4657072be85SIrina Sokolova    matrices.
4667072be85SIrina Sokolova 
467db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
4687072be85SIrina Sokolova 
4697072be85SIrina Sokolova @*/
4709371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
4717072be85SIrina Sokolova   PetscFunctionBegin;
4729566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
4739566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
4749566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
4759566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
4767072be85SIrina Sokolova   PetscFunctionReturn(0);
4777072be85SIrina Sokolova }
4789c46acdfSRichard Tran Mills 
4799371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) {
4807072be85SIrina Sokolova   PetscFunctionBegin;
4819566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQBAIJ));
4829566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
4837072be85SIrina Sokolova   PetscFunctionReturn(0);
4847072be85SIrina Sokolova }
485