xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
17072be85SIrina Sokolova /*
27072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
37072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
47072be85SIrina Sokolova   wherever possible. If used MKL verion is older than 11.3 PETSc default
57072be85SIrina Sokolova   code for sparse matrix operations is used.
67072be85SIrina Sokolova */
77072be85SIrina Sokolova 
87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10b9e7e5c1SBarry Smith #include <mkl_spblas.h>
117072be85SIrina Sokolova 
12*d71ae5a4SJacob Faibussowitsch static PetscBool PetscSeqBAIJSupportsZeroBased(void)
13*d71ae5a4SJacob Faibussowitsch {
14b9e7e5c1SBarry Smith   static PetscBool set = PETSC_FALSE, value;
15b9e7e5c1SBarry Smith   int              n   = 1, ia[1], ja[1];
16b9e7e5c1SBarry Smith   float            a[1];
17b9e7e5c1SBarry Smith   sparse_status_t  status;
18b9e7e5c1SBarry Smith   sparse_matrix_t  A;
197072be85SIrina Sokolova 
20b9e7e5c1SBarry Smith   if (!set) {
21b9e7e5c1SBarry Smith     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a);
22b9e7e5c1SBarry Smith     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
23b9e7e5c1SBarry Smith     (void)mkl_sparse_destroy(A);
24b9e7e5c1SBarry Smith     set = PETSC_TRUE;
25b9e7e5c1SBarry Smith   }
26b9e7e5c1SBarry Smith   return value;
27b9e7e5c1SBarry Smith }
287072be85SIrina Sokolova 
297072be85SIrina Sokolova typedef struct {
307072be85SIrina Sokolova   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
317072be85SIrina Sokolova   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
327072be85SIrina Sokolova   struct matrix_descr descr;
337072be85SIrina Sokolova   PetscInt           *ai1;
347072be85SIrina Sokolova   PetscInt           *aj1;
357072be85SIrina Sokolova } Mat_SeqBAIJMKL;
367072be85SIrina Sokolova 
37b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
387072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
397072be85SIrina Sokolova 
40*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
41*d71ae5a4SJacob Faibussowitsch {
427072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
437072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
447072be85SIrina Sokolova   Mat             B       = *newmat;
457072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
467072be85SIrina Sokolova 
477072be85SIrina Sokolova   PetscFunctionBegin;
489566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
497072be85SIrina Sokolova 
507072be85SIrina Sokolova   /* Reset the original function pointers. */
517072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
527072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
537072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
547072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
557072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
567072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
577072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
587072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
597072be85SIrina Sokolova 
607072be85SIrina Sokolova   switch (A->rmap->bs) {
617072be85SIrina Sokolova   case 1:
627072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_1;
637072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
647072be85SIrina Sokolova     break;
657072be85SIrina Sokolova   case 2:
667072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_2;
677072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
687072be85SIrina Sokolova     break;
697072be85SIrina Sokolova   case 3:
707072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_3;
717072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
727072be85SIrina Sokolova     break;
737072be85SIrina Sokolova   case 4:
747072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_4;
757072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
767072be85SIrina Sokolova     break;
777072be85SIrina Sokolova   case 5:
787072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_5;
797072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
807072be85SIrina Sokolova     break;
817072be85SIrina Sokolova   case 6:
827072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_6;
837072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
847072be85SIrina Sokolova     break;
857072be85SIrina Sokolova   case 7:
867072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_7;
877072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
887072be85SIrina Sokolova     break;
897072be85SIrina Sokolova   case 11:
907072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_11;
917072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
927072be85SIrina Sokolova     break;
937072be85SIrina Sokolova   case 15:
947072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
957072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
967072be85SIrina Sokolova     break;
977072be85SIrina Sokolova   default:
987072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_N;
997072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1007072be85SIrina Sokolova     break;
1017072be85SIrina Sokolova   }
1029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
1037072be85SIrina Sokolova 
1047072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
1057072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
1067072be85SIrina Sokolova    * the spptr pointer. */
1077072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
1087072be85SIrina Sokolova 
109792fecdfSBarry Smith   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1109566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1119566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
1127072be85SIrina Sokolova 
1137072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
1149566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
1157072be85SIrina Sokolova 
1167072be85SIrina Sokolova   *newmat = B;
1177072be85SIrina Sokolova   PetscFunctionReturn(0);
1187072be85SIrina Sokolova }
1197072be85SIrina Sokolova 
120*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
121*d71ae5a4SJacob Faibussowitsch {
1227072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1237072be85SIrina Sokolova 
1247072be85SIrina Sokolova   PetscFunctionBegin;
1257072be85SIrina Sokolova   if (baijmkl) {
1267072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127792fecdfSBarry Smith     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1289566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1299566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
1307072be85SIrina Sokolova   }
1317072be85SIrina Sokolova 
1327072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
1337072be85SIrina Sokolova    * to destroy everything that remains. */
1349566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
1359566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqBAIJ(A));
1367072be85SIrina Sokolova   PetscFunctionReturn(0);
1377072be85SIrina Sokolova }
1387072be85SIrina Sokolova 
139*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
140*d71ae5a4SJacob Faibussowitsch {
1417072be85SIrina Sokolova   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
1427072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1430835cbf9SRichard Tran Mills   PetscInt        mbs, nbs, nz, bs;
1447072be85SIrina Sokolova   MatScalar      *aa;
1457072be85SIrina Sokolova   PetscInt       *aj, *ai;
14680278ffbSSatish Balay   PetscInt        i;
1477072be85SIrina Sokolova 
1487072be85SIrina Sokolova   PetscFunctionBegin;
1497072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1507072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
1517072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
1529566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1539566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
1547072be85SIrina Sokolova   }
1557072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
1567072be85SIrina Sokolova 
1577072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
1587072be85SIrina Sokolova   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
1597072be85SIrina Sokolova   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
1607072be85SIrina Sokolova   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
1617072be85SIrina Sokolova   mbs                 = a->mbs;
1627072be85SIrina Sokolova   nbs                 = a->nbs;
1637072be85SIrina Sokolova   nz                  = a->nz;
1647072be85SIrina Sokolova   bs                  = A->rmap->bs;
1657072be85SIrina Sokolova   aa                  = a->a;
1667072be85SIrina Sokolova 
16780095d54SIrina Sokolova   if ((nz != 0) & !(A->structure_only)) {
1687072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1697072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170b9e7e5c1SBarry Smith     if (PetscSeqBAIJSupportsZeroBased()) {
1717072be85SIrina Sokolova       aj = a->j;
1727072be85SIrina Sokolova       ai = a->i;
1739566063dSJacob Faibussowitsch       PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
174b9e7e5c1SBarry Smith     } else {
1759566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
176b9e7e5c1SBarry Smith       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
177b9e7e5c1SBarry Smith       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
1787072be85SIrina Sokolova       aa = a->a;
1799566063dSJacob Faibussowitsch       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
1807072be85SIrina Sokolova       baijmkl->ai1 = ai;
1817072be85SIrina Sokolova       baijmkl->aj1 = aj;
182b9e7e5c1SBarry Smith     }
1839566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
1849566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
1859566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
1867072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
1877072be85SIrina Sokolova   }
1887072be85SIrina Sokolova   PetscFunctionReturn(0);
1897072be85SIrina Sokolova }
1907072be85SIrina Sokolova 
191*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
192*d71ae5a4SJacob Faibussowitsch {
1937072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
1947072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
1957072be85SIrina Sokolova 
1967072be85SIrina Sokolova   PetscFunctionBegin;
1979566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
1987072be85SIrina Sokolova   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1994dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl_dest));
20071bc03e0SIrina Sokolova   (*M)->spptr = (void *)baijmkl_dest;
2019566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
2027072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
2039566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2047072be85SIrina Sokolova   PetscFunctionReturn(0);
2057072be85SIrina Sokolova }
2067072be85SIrina Sokolova 
207*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
208*d71ae5a4SJacob Faibussowitsch {
2097072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2107072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2117072be85SIrina Sokolova   const PetscScalar *x;
2127072be85SIrina Sokolova   PetscScalar       *y;
2137072be85SIrina Sokolova 
2147072be85SIrina Sokolova   PetscFunctionBegin;
2157072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2167072be85SIrina Sokolova   if (!a->nz) {
2179566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2187072be85SIrina Sokolova     PetscFunctionReturn(0);
2197072be85SIrina Sokolova   }
2207072be85SIrina Sokolova 
2219566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2229566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2237072be85SIrina Sokolova 
2247072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2257072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2267072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
22748a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2287072be85SIrina Sokolova 
2297072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
2309566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2317072be85SIrina Sokolova 
2329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2357072be85SIrina Sokolova   PetscFunctionReturn(0);
2367072be85SIrina Sokolova }
2377072be85SIrina Sokolova 
238*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
239*d71ae5a4SJacob Faibussowitsch {
2407072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2417072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2427072be85SIrina Sokolova   const PetscScalar *x;
2437072be85SIrina Sokolova   PetscScalar       *y;
2447072be85SIrina Sokolova 
2457072be85SIrina Sokolova   PetscFunctionBegin;
2467072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2477072be85SIrina Sokolova   if (!a->nz) {
2489566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2497072be85SIrina Sokolova     PetscFunctionReturn(0);
2507072be85SIrina Sokolova   }
2517072be85SIrina Sokolova 
2529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2547072be85SIrina Sokolova 
2557072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2567072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2577072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
25848a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2597072be85SIrina Sokolova 
2607072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
2619566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2627072be85SIrina Sokolova 
2639566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2667072be85SIrina Sokolova   PetscFunctionReturn(0);
2677072be85SIrina Sokolova }
2687072be85SIrina Sokolova 
269*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
270*d71ae5a4SJacob Faibussowitsch {
2717072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2727072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2737072be85SIrina Sokolova   const PetscScalar *x;
2747072be85SIrina Sokolova   PetscScalar       *y, *z;
2757072be85SIrina Sokolova   PetscInt           m = a->mbs * A->rmap->bs;
2767072be85SIrina Sokolova   PetscInt           i;
2777072be85SIrina Sokolova 
2787072be85SIrina Sokolova   PetscFunctionBegin;
2797072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
2807072be85SIrina Sokolova   if (!a->nz) {
2819566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
2827072be85SIrina Sokolova     PetscFunctionReturn(0);
2837072be85SIrina Sokolova   }
2847072be85SIrina Sokolova 
2859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
2877072be85SIrina Sokolova 
2887072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2897072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2907072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
29148a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2927072be85SIrina Sokolova 
2937072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
2947072be85SIrina Sokolova   if (zz == yy) {
2957072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
2967072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
2979566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
2987072be85SIrina Sokolova   } else {
2997072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3007072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3019566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
302ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
3037072be85SIrina Sokolova   }
3047072be85SIrina Sokolova 
3059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3087072be85SIrina Sokolova   PetscFunctionReturn(0);
3097072be85SIrina Sokolova }
3107072be85SIrina Sokolova 
311*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
312*d71ae5a4SJacob Faibussowitsch {
3137072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
3147072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
3157072be85SIrina Sokolova   const PetscScalar *x;
3167072be85SIrina Sokolova   PetscScalar       *y, *z;
3177072be85SIrina Sokolova   PetscInt           n = a->nbs * A->rmap->bs;
3187072be85SIrina Sokolova   PetscInt           i;
3197072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
3207072be85SIrina Sokolova 
3217072be85SIrina Sokolova   PetscFunctionBegin;
3227072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3237072be85SIrina Sokolova   if (!a->nz) {
3249566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
3257072be85SIrina Sokolova     PetscFunctionReturn(0);
3267072be85SIrina Sokolova   }
3277072be85SIrina Sokolova 
3289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3299566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
3307072be85SIrina Sokolova 
3317072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3327072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3337072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
33448a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3357072be85SIrina Sokolova 
3367072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3377072be85SIrina Sokolova   if (zz == yy) {
3387072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3397072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
3409566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3417072be85SIrina Sokolova   } else {
3427072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3437072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3449566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
345ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
3467072be85SIrina Sokolova   }
3477072be85SIrina Sokolova 
3489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3517072be85SIrina Sokolova   PetscFunctionReturn(0);
3527072be85SIrina Sokolova }
3537072be85SIrina Sokolova 
354*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
355*d71ae5a4SJacob Faibussowitsch {
3567072be85SIrina Sokolova   PetscFunctionBegin;
3579566063dSJacob Faibussowitsch   PetscCall(MatScale_SeqBAIJ(inA, alpha));
3589566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
3597072be85SIrina Sokolova   PetscFunctionReturn(0);
3607072be85SIrina Sokolova }
3617072be85SIrina Sokolova 
362*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
363*d71ae5a4SJacob Faibussowitsch {
3647072be85SIrina Sokolova   PetscFunctionBegin;
3659566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
3669566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3677072be85SIrina Sokolova   PetscFunctionReturn(0);
3687072be85SIrina Sokolova }
3697072be85SIrina Sokolova 
370*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
371*d71ae5a4SJacob Faibussowitsch {
3727072be85SIrina Sokolova   PetscFunctionBegin;
3739566063dSJacob Faibussowitsch   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
3747072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
3757072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
3769566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
3777072be85SIrina Sokolova   }
3787072be85SIrina Sokolova   PetscFunctionReturn(0);
3797072be85SIrina Sokolova }
3807072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
3817072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
3827072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
3837072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
384*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
385*d71ae5a4SJacob Faibussowitsch {
3867072be85SIrina Sokolova   Mat             B = *newmat;
3877072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
3887072be85SIrina Sokolova   PetscBool       sametype;
3897072be85SIrina Sokolova 
3907072be85SIrina Sokolova   PetscFunctionBegin;
3919566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
3927072be85SIrina Sokolova 
3939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
3947072be85SIrina Sokolova   if (sametype) PetscFunctionReturn(0);
3957072be85SIrina Sokolova 
3964dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl));
3977072be85SIrina Sokolova   B->spptr = (void *)baijmkl;
3987072be85SIrina Sokolova 
3997072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
4007072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
4017072be85SIrina Sokolova   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
4027072be85SIrina Sokolova 
4037072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
4047072be85SIrina Sokolova 
4059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL));
4069566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
4077072be85SIrina Sokolova 
4089566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
4097072be85SIrina Sokolova   *newmat = B;
4107072be85SIrina Sokolova   PetscFunctionReturn(0);
4117072be85SIrina Sokolova }
4129c46acdfSRichard Tran Mills 
413*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
414*d71ae5a4SJacob Faibussowitsch {
4154d6dccb5SIrina Sokolova   PetscFunctionBegin;
4164d6dccb5SIrina Sokolova   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
4179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
4189566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
4194d6dccb5SIrina Sokolova   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
4204d6dccb5SIrina Sokolova   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
4214d6dccb5SIrina Sokolova   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
4224d6dccb5SIrina Sokolova   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
4234d6dccb5SIrina Sokolova   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
4244d6dccb5SIrina Sokolova   A->ops->scale            = MatScale_SeqBAIJMKL;
4254d6dccb5SIrina Sokolova   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
4264d6dccb5SIrina Sokolova   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
4274d6dccb5SIrina Sokolova   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
4284d6dccb5SIrina Sokolova   PetscFunctionReturn(0);
4294d6dccb5SIrina Sokolova }
4309c46acdfSRichard Tran Mills 
4317072be85SIrina Sokolova /*@C
43211a5261eSBarry Smith    MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
43311a5261eSBarry Smith    This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
4347072be85SIrina Sokolova    routines from Intel MKL whenever possible.
43511a5261eSBarry Smith    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
4367072be85SIrina Sokolova    operations are currently supported.
4377072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
4387072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
4397072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
4407072be85SIrina Sokolova 
4417072be85SIrina Sokolova    Input Parameters:
44211a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
4437072be85SIrina Sokolova .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
4447072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
4457072be85SIrina Sokolova .  m - number of rows
4467072be85SIrina Sokolova .  n - number of columns
4477072be85SIrina Sokolova .  nz - number of nonzero blocks  per block row (same for all rows)
4487072be85SIrina Sokolova -  nnz - array containing the number of nonzero blocks in the various block rows
4497072be85SIrina Sokolova          (possibly different for each block row) or NULL
4507072be85SIrina Sokolova 
4517072be85SIrina Sokolova    Output Parameter:
4527072be85SIrina Sokolova .  A - the matrix
4537072be85SIrina Sokolova 
45411a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
455f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
45611a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
4577072be85SIrina Sokolova 
4587072be85SIrina Sokolova    Options Database Keys:
45911a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
460a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
4617072be85SIrina Sokolova 
4627072be85SIrina Sokolova    Level: intermediate
4637072be85SIrina Sokolova 
4647072be85SIrina Sokolova    Notes:
4657072be85SIrina Sokolova    The number of rows and columns must be divisible by blocksize.
4667072be85SIrina Sokolova 
4677072be85SIrina Sokolova    If the nnz parameter is given then the nz parameter is ignored
4687072be85SIrina Sokolova 
4697072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
4707072be85SIrina Sokolova 
47111a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
4727072be85SIrina Sokolova    storage.  That is, the stored row and column indices can begin at
4737072be85SIrina Sokolova    either one (as in Fortran) or zero.  See the users' manual for details.
4747072be85SIrina Sokolova 
4757072be85SIrina Sokolova    Specify the preallocated storage with either nz or nnz (not both).
47611a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
477651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
4787072be85SIrina Sokolova    matrices.
4797072be85SIrina Sokolova 
480651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
4817072be85SIrina Sokolova @*/
482*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
483*d71ae5a4SJacob Faibussowitsch {
4847072be85SIrina Sokolova   PetscFunctionBegin;
4859566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
4869566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
4879566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
4889566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
4897072be85SIrina Sokolova   PetscFunctionReturn(0);
4907072be85SIrina Sokolova }
4919c46acdfSRichard Tran Mills 
492*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
493*d71ae5a4SJacob Faibussowitsch {
4947072be85SIrina Sokolova   PetscFunctionBegin;
4959566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQBAIJ));
4969566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
4977072be85SIrina Sokolova   PetscFunctionReturn(0);
4987072be85SIrina Sokolova }
499