xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision f4f49eeac7efa77fffa46b7ff95a3ed169f659ed)
17072be85SIrina Sokolova /*
27072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
37072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4da81f932SPierre Jolivet   wherever possible. If used MKL version is older than 11.3 PETSc default
57072be85SIrina Sokolova   code for sparse matrix operations is used.
67072be85SIrina Sokolova */
77072be85SIrina Sokolova 
87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11bdfea6dbSBarry Smith   #define MKL_ILP64
12bdfea6dbSBarry Smith #endif
13b9e7e5c1SBarry Smith #include <mkl_spblas.h>
147072be85SIrina Sokolova 
15d71ae5a4SJacob Faibussowitsch static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16d71ae5a4SJacob Faibussowitsch {
17b9e7e5c1SBarry Smith   static PetscBool set = PETSC_FALSE, value;
18b9e7e5c1SBarry Smith   int              n   = 1, ia[1], ja[1];
19b9e7e5c1SBarry Smith   float            a[1];
20b9e7e5c1SBarry Smith   sparse_status_t  status;
21b9e7e5c1SBarry Smith   sparse_matrix_t  A;
227072be85SIrina Sokolova 
23b9e7e5c1SBarry Smith   if (!set) {
24bdfea6dbSBarry Smith     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25b9e7e5c1SBarry Smith     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26b9e7e5c1SBarry Smith     (void)mkl_sparse_destroy(A);
27b9e7e5c1SBarry Smith     set = PETSC_TRUE;
28b9e7e5c1SBarry Smith   }
29b9e7e5c1SBarry Smith   return value;
30b9e7e5c1SBarry Smith }
317072be85SIrina Sokolova 
327072be85SIrina Sokolova typedef struct {
337072be85SIrina Sokolova   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
347072be85SIrina Sokolova   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
357072be85SIrina Sokolova   struct matrix_descr descr;
367072be85SIrina Sokolova   PetscInt           *ai1;
377072be85SIrina Sokolova   PetscInt           *aj1;
387072be85SIrina Sokolova } Mat_SeqBAIJMKL;
397072be85SIrina Sokolova 
40b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
417072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
427072be85SIrina Sokolova 
43d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
44d71ae5a4SJacob Faibussowitsch {
457072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
467072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
477072be85SIrina Sokolova   Mat             B       = *newmat;
487072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
497072be85SIrina Sokolova 
507072be85SIrina Sokolova   PetscFunctionBegin;
519566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
527072be85SIrina Sokolova 
537072be85SIrina Sokolova   /* Reset the original function pointers. */
547072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
557072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
567072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
577072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
587072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
597072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
607072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
617072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
627072be85SIrina Sokolova 
637072be85SIrina Sokolova   switch (A->rmap->bs) {
647072be85SIrina Sokolova   case 1:
657072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_1;
667072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
677072be85SIrina Sokolova     break;
687072be85SIrina Sokolova   case 2:
697072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_2;
707072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
717072be85SIrina Sokolova     break;
727072be85SIrina Sokolova   case 3:
737072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_3;
747072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
757072be85SIrina Sokolova     break;
767072be85SIrina Sokolova   case 4:
777072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_4;
787072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
797072be85SIrina Sokolova     break;
807072be85SIrina Sokolova   case 5:
817072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_5;
827072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
837072be85SIrina Sokolova     break;
847072be85SIrina Sokolova   case 6:
857072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_6;
867072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
877072be85SIrina Sokolova     break;
887072be85SIrina Sokolova   case 7:
897072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_7;
907072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
917072be85SIrina Sokolova     break;
927072be85SIrina Sokolova   case 11:
937072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_11;
947072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
957072be85SIrina Sokolova     break;
967072be85SIrina Sokolova   case 15:
977072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
987072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
997072be85SIrina Sokolova     break;
1007072be85SIrina Sokolova   default:
1017072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_N;
1027072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1037072be85SIrina Sokolova     break;
1047072be85SIrina Sokolova   }
1059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
1067072be85SIrina Sokolova 
1077072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
1087072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
1097072be85SIrina Sokolova    * the spptr pointer. */
1107072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
1117072be85SIrina Sokolova 
112792fecdfSBarry Smith   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1139566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1149566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
1157072be85SIrina Sokolova 
1167072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
1179566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
1187072be85SIrina Sokolova 
1197072be85SIrina Sokolova   *newmat = B;
1203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1217072be85SIrina Sokolova }
1227072be85SIrina Sokolova 
123d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
124d71ae5a4SJacob Faibussowitsch {
1257072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1267072be85SIrina Sokolova 
1277072be85SIrina Sokolova   PetscFunctionBegin;
1287072be85SIrina Sokolova   if (baijmkl) {
1297072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
130792fecdfSBarry Smith     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1319566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1329566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
1337072be85SIrina Sokolova   }
1347072be85SIrina Sokolova 
1357072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
1367072be85SIrina Sokolova    * to destroy everything that remains. */
1379566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
1389566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqBAIJ(A));
1393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1407072be85SIrina Sokolova }
1417072be85SIrina Sokolova 
142d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
143d71ae5a4SJacob Faibussowitsch {
1447072be85SIrina Sokolova   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
1457072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1460835cbf9SRichard Tran Mills   PetscInt        mbs, nbs, nz, bs;
1477072be85SIrina Sokolova   MatScalar      *aa;
1487072be85SIrina Sokolova   PetscInt       *aj, *ai;
14980278ffbSSatish Balay   PetscInt        i;
1507072be85SIrina Sokolova 
1517072be85SIrina Sokolova   PetscFunctionBegin;
1527072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1537072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
1547072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
1559566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1569566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
1577072be85SIrina Sokolova   }
1587072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
1597072be85SIrina Sokolova 
1607072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
1617072be85SIrina Sokolova   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
1627072be85SIrina Sokolova   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
1637072be85SIrina Sokolova   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
1647072be85SIrina Sokolova   mbs                 = a->mbs;
1657072be85SIrina Sokolova   nbs                 = a->nbs;
1667072be85SIrina Sokolova   nz                  = a->nz;
1677072be85SIrina Sokolova   bs                  = A->rmap->bs;
1687072be85SIrina Sokolova   aa                  = a->a;
1697072be85SIrina Sokolova 
170*f4f49eeaSPierre Jolivet   if ((nz != 0) & !A->structure_only) {
1717072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1727072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
173b9e7e5c1SBarry Smith     if (PetscSeqBAIJSupportsZeroBased()) {
1747072be85SIrina Sokolova       aj = a->j;
1757072be85SIrina Sokolova       ai = a->i;
176*f4f49eeaSPierre Jolivet       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
177b9e7e5c1SBarry Smith     } else {
1789566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
179b9e7e5c1SBarry Smith       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
180b9e7e5c1SBarry Smith       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
1817072be85SIrina Sokolova       aa = a->a;
18245d28df7SStefano Zampini       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
1837072be85SIrina Sokolova       baijmkl->ai1 = ai;
1847072be85SIrina Sokolova       baijmkl->aj1 = aj;
185b9e7e5c1SBarry Smith     }
1869566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
1879566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
1889566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
1897072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
1907072be85SIrina Sokolova   }
1913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1927072be85SIrina Sokolova }
1937072be85SIrina Sokolova 
194d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
195d71ae5a4SJacob Faibussowitsch {
1967072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
1977072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
1987072be85SIrina Sokolova 
1997072be85SIrina Sokolova   PetscFunctionBegin;
2009566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
2017072be85SIrina Sokolova   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2024dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl_dest));
20371bc03e0SIrina Sokolova   (*M)->spptr = (void *)baijmkl_dest;
2049566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
2057072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
2069566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2087072be85SIrina Sokolova }
2097072be85SIrina Sokolova 
210d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
211d71ae5a4SJacob Faibussowitsch {
2127072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2137072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2147072be85SIrina Sokolova   const PetscScalar *x;
2157072be85SIrina Sokolova   PetscScalar       *y;
2167072be85SIrina Sokolova 
2177072be85SIrina Sokolova   PetscFunctionBegin;
2187072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2197072be85SIrina Sokolova   if (!a->nz) {
2209566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2213ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2227072be85SIrina Sokolova   }
2237072be85SIrina Sokolova 
2249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2259566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2267072be85SIrina Sokolova 
2277072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2287072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2297072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
23048a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2317072be85SIrina Sokolova 
2327072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
2339566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2347072be85SIrina Sokolova 
2359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2397072be85SIrina Sokolova }
2407072be85SIrina Sokolova 
241d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
242d71ae5a4SJacob Faibussowitsch {
2437072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2447072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2457072be85SIrina Sokolova   const PetscScalar *x;
2467072be85SIrina Sokolova   PetscScalar       *y;
2477072be85SIrina Sokolova 
2487072be85SIrina Sokolova   PetscFunctionBegin;
2497072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2507072be85SIrina Sokolova   if (!a->nz) {
2519566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2523ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2537072be85SIrina Sokolova   }
2547072be85SIrina Sokolova 
2559566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2577072be85SIrina Sokolova 
2587072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2597072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2607072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
26148a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2627072be85SIrina Sokolova 
2637072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
2649566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2657072be85SIrina Sokolova 
2669566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2707072be85SIrina Sokolova }
2717072be85SIrina Sokolova 
272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
273d71ae5a4SJacob Faibussowitsch {
2747072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2757072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2767072be85SIrina Sokolova   const PetscScalar *x;
2777072be85SIrina Sokolova   PetscScalar       *y, *z;
2787072be85SIrina Sokolova   PetscInt           m = a->mbs * A->rmap->bs;
2797072be85SIrina Sokolova   PetscInt           i;
2807072be85SIrina Sokolova 
2817072be85SIrina Sokolova   PetscFunctionBegin;
2827072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
2837072be85SIrina Sokolova   if (!a->nz) {
2849566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
2853ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2867072be85SIrina Sokolova   }
2877072be85SIrina Sokolova 
2889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2899566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
2907072be85SIrina Sokolova 
2917072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2927072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2937072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
29448a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2957072be85SIrina Sokolova 
2967072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
2977072be85SIrina Sokolova   if (zz == yy) {
2987072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
2997072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
3009566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3017072be85SIrina Sokolova   } else {
3027072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3037072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3049566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
305ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
3067072be85SIrina Sokolova   }
3077072be85SIrina Sokolova 
3089566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3127072be85SIrina Sokolova }
3137072be85SIrina Sokolova 
314d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
315d71ae5a4SJacob Faibussowitsch {
3167072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
3177072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
3187072be85SIrina Sokolova   const PetscScalar *x;
3197072be85SIrina Sokolova   PetscScalar       *y, *z;
3207072be85SIrina Sokolova   PetscInt           n = a->nbs * A->rmap->bs;
3217072be85SIrina Sokolova   PetscInt           i;
3227072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
3237072be85SIrina Sokolova 
3247072be85SIrina Sokolova   PetscFunctionBegin;
3257072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3267072be85SIrina Sokolova   if (!a->nz) {
3279566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
3283ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
3297072be85SIrina Sokolova   }
3307072be85SIrina Sokolova 
3319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3329566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
3337072be85SIrina Sokolova 
3347072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3357072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3367072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
33748a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3387072be85SIrina Sokolova 
3397072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3407072be85SIrina Sokolova   if (zz == yy) {
3417072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3427072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
3439566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3447072be85SIrina Sokolova   } else {
3457072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3467072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3479566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
348ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
3497072be85SIrina Sokolova   }
3507072be85SIrina Sokolova 
3519566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3557072be85SIrina Sokolova }
3567072be85SIrina Sokolova 
357d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
358d71ae5a4SJacob Faibussowitsch {
3597072be85SIrina Sokolova   PetscFunctionBegin;
3609566063dSJacob Faibussowitsch   PetscCall(MatScale_SeqBAIJ(inA, alpha));
3619566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
3623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3637072be85SIrina Sokolova }
3647072be85SIrina Sokolova 
365d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
366d71ae5a4SJacob Faibussowitsch {
3677072be85SIrina Sokolova   PetscFunctionBegin;
3689566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
3699566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3717072be85SIrina Sokolova }
3727072be85SIrina Sokolova 
373d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
374d71ae5a4SJacob Faibussowitsch {
3757072be85SIrina Sokolova   PetscFunctionBegin;
3769566063dSJacob Faibussowitsch   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
3777072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
3787072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
3799566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
3807072be85SIrina Sokolova   }
3813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3827072be85SIrina Sokolova }
3837072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
3847072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
3857072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
3867072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
387d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
388d71ae5a4SJacob Faibussowitsch {
3897072be85SIrina Sokolova   Mat             B = *newmat;
3907072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
3917072be85SIrina Sokolova   PetscBool       sametype;
3927072be85SIrina Sokolova 
3937072be85SIrina Sokolova   PetscFunctionBegin;
3949566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
3957072be85SIrina Sokolova 
3969566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
3973ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
3987072be85SIrina Sokolova 
3994dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl));
4007072be85SIrina Sokolova   B->spptr = (void *)baijmkl;
4017072be85SIrina Sokolova 
4027072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
4037072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
4047072be85SIrina Sokolova   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
4057072be85SIrina Sokolova 
4067072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
4077072be85SIrina Sokolova 
40814e4dea2SJose E. Roman   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
4099566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
4107072be85SIrina Sokolova 
4119566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
4127072be85SIrina Sokolova   *newmat = B;
4133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4147072be85SIrina Sokolova }
4159c46acdfSRichard Tran Mills 
416d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
417d71ae5a4SJacob Faibussowitsch {
4184d6dccb5SIrina Sokolova   PetscFunctionBegin;
4193ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
4209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
4219566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
4224d6dccb5SIrina Sokolova   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
4234d6dccb5SIrina Sokolova   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
4244d6dccb5SIrina Sokolova   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
4254d6dccb5SIrina Sokolova   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
4264d6dccb5SIrina Sokolova   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
4274d6dccb5SIrina Sokolova   A->ops->scale            = MatScale_SeqBAIJMKL;
4284d6dccb5SIrina Sokolova   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
4294d6dccb5SIrina Sokolova   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
4304d6dccb5SIrina Sokolova   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
4313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4324d6dccb5SIrina Sokolova }
4339c46acdfSRichard Tran Mills 
4347072be85SIrina Sokolova /*@C
43511a5261eSBarry Smith   MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
43611a5261eSBarry Smith   This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
4377072be85SIrina Sokolova   routines from Intel MKL whenever possible.
4387072be85SIrina Sokolova 
4397072be85SIrina Sokolova   Input Parameters:
44011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
44120f4b53cSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
44220f4b53cSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
4437072be85SIrina Sokolova . m    - number of rows
4447072be85SIrina Sokolova . n    - number of columns
4457072be85SIrina Sokolova . nz   - number of nonzero blocks  per block row (same for all rows)
4467072be85SIrina Sokolova - nnz  - array containing the number of nonzero blocks in the various block rows
44720f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
4487072be85SIrina Sokolova 
4497072be85SIrina Sokolova   Output Parameter:
4507072be85SIrina Sokolova . A - the matrix
4517072be85SIrina Sokolova 
45211a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
453f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
45411a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
4557072be85SIrina Sokolova 
4567072be85SIrina Sokolova   Options Database Keys:
45711a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
458a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
4597072be85SIrina Sokolova 
4607072be85SIrina Sokolova   Level: intermediate
4617072be85SIrina Sokolova 
4627072be85SIrina Sokolova   Notes:
4637072be85SIrina Sokolova   The number of rows and columns must be divisible by blocksize.
4647072be85SIrina Sokolova 
46520f4b53cSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
4667072be85SIrina Sokolova 
4677072be85SIrina Sokolova   A nonzero block is any block that as 1 or more nonzeros in it
4687072be85SIrina Sokolova 
46920f4b53cSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
47020f4b53cSBarry Smith   operations are currently supported.
47120f4b53cSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
47220f4b53cSBarry Smith   inspector-executor routines, then those are used by default.
47320f4b53cSBarry Smith   Default PETSc kernels are used otherwise.
47420f4b53cSBarry Smith 
4752ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
4767072be85SIrina Sokolova   storage.  That is, the stored row and column indices can begin at
4777072be85SIrina Sokolova   either one (as in Fortran) or zero.  See the users' manual for details.
4787072be85SIrina Sokolova 
4797072be85SIrina Sokolova   Specify the preallocated storage with either nz or nnz (not both).
48011a5261eSBarry Smith   Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
481651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
4827072be85SIrina Sokolova   matrices.
4837072be85SIrina Sokolova 
484651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
4857072be85SIrina Sokolova @*/
486d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
487d71ae5a4SJacob Faibussowitsch {
4887072be85SIrina Sokolova   PetscFunctionBegin;
4899566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
4909566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
4919566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
4929566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
4933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4947072be85SIrina Sokolova }
4959c46acdfSRichard Tran Mills 
496d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
497d71ae5a4SJacob Faibussowitsch {
4987072be85SIrina Sokolova   PetscFunctionBegin;
4999566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQBAIJ));
5009566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
5013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5027072be85SIrina Sokolova }
503