xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision b9e7e5c1f1fe158a0312e1538d69d2999b53c077)
17072be85SIrina Sokolova /*
27072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
37072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
47072be85SIrina Sokolova   wherever possible. If used MKL verion is older than 11.3 PETSc default
57072be85SIrina Sokolova   code for sparse matrix operations is used.
67072be85SIrina Sokolova */
77072be85SIrina Sokolova 
87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10*b9e7e5c1SBarry Smith #include <mkl_spblas.h>
117072be85SIrina Sokolova 
12*b9e7e5c1SBarry Smith static PetscBool PetscSeqBAIJSupportsZeroBased(void)
13*b9e7e5c1SBarry Smith {
14*b9e7e5c1SBarry Smith   static PetscBool set = PETSC_FALSE,value;
15*b9e7e5c1SBarry Smith   int              n=1,ia[1],ja[1];
16*b9e7e5c1SBarry Smith   float            a[1];
17*b9e7e5c1SBarry Smith   sparse_status_t  status;
18*b9e7e5c1SBarry Smith   sparse_matrix_t  A;
197072be85SIrina Sokolova 
20*b9e7e5c1SBarry Smith   if (!set) {
21*b9e7e5c1SBarry Smith     status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a);
22*b9e7e5c1SBarry Smith     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
23*b9e7e5c1SBarry Smith     (void)   mkl_sparse_destroy(A);
24*b9e7e5c1SBarry Smith     set    = PETSC_TRUE;
25*b9e7e5c1SBarry Smith   }
26*b9e7e5c1SBarry Smith   return value;
27*b9e7e5c1SBarry Smith }
287072be85SIrina Sokolova 
297072be85SIrina Sokolova typedef struct {
307072be85SIrina Sokolova   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
317072be85SIrina Sokolova   sparse_matrix_t     bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
327072be85SIrina Sokolova   struct matrix_descr descr;
337072be85SIrina Sokolova   PetscInt            *ai1;
347072be85SIrina Sokolova   PetscInt            *aj1;
357072be85SIrina Sokolova } Mat_SeqBAIJMKL;
367072be85SIrina Sokolova 
37*b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
387072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
397072be85SIrina Sokolova 
407072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
417072be85SIrina Sokolova {
427072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
437072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
447072be85SIrina Sokolova   PetscErrorCode ierr;
457072be85SIrina Sokolova   Mat            B        = *newmat;
467072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
477072be85SIrina Sokolova 
487072be85SIrina Sokolova   PetscFunctionBegin;
497072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
507072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
517072be85SIrina Sokolova   }
527072be85SIrina Sokolova 
537072be85SIrina Sokolova   /* Reset the original function pointers. */
547072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
557072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
567072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
577072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
587072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
597072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
607072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
617072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
627072be85SIrina Sokolova 
637072be85SIrina Sokolova   switch (A->rmap->bs) {
647072be85SIrina Sokolova     case 1:
657072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_1;
667072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
677072be85SIrina Sokolova       break;
687072be85SIrina Sokolova     case 2:
697072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_2;
707072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
717072be85SIrina Sokolova       break;
727072be85SIrina Sokolova     case 3:
737072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_3;
747072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
757072be85SIrina Sokolova       break;
767072be85SIrina Sokolova     case 4:
777072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_4;
787072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
797072be85SIrina Sokolova       break;
807072be85SIrina Sokolova     case 5:
817072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_5;
827072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
837072be85SIrina Sokolova       break;
847072be85SIrina Sokolova     case 6:
857072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_6;
867072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
877072be85SIrina Sokolova       break;
887072be85SIrina Sokolova     case 7:
897072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_7;
907072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
917072be85SIrina Sokolova       break;
927072be85SIrina Sokolova     case 11:
937072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_11;
947072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
957072be85SIrina Sokolova       break;
967072be85SIrina Sokolova     case 15:
977072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
987072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
997072be85SIrina Sokolova       break;
1007072be85SIrina Sokolova     default:
1017072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_N;
1027072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1037072be85SIrina Sokolova       break;
1047072be85SIrina Sokolova   }
1057072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
1067072be85SIrina Sokolova 
1077072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
1087072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
1097072be85SIrina Sokolova    * the spptr pointer. */
1107072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
1117072be85SIrina Sokolova 
1127072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1137072be85SIrina Sokolova     sparse_status_t stat;
1147072be85SIrina Sokolova     stat = mkl_sparse_destroy(baijmkl->bsrA);
1159c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1167072be85SIrina Sokolova   }
117*b9e7e5c1SBarry Smith   ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr);
1187072be85SIrina Sokolova   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
1197072be85SIrina Sokolova 
1207072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
1217072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
1227072be85SIrina Sokolova 
1237072be85SIrina Sokolova   *newmat = B;
1247072be85SIrina Sokolova   PetscFunctionReturn(0);
1257072be85SIrina Sokolova }
1267072be85SIrina Sokolova 
127*b9e7e5c1SBarry Smith static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
1287072be85SIrina Sokolova {
1297072be85SIrina Sokolova   PetscErrorCode ierr;
1307072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
1317072be85SIrina Sokolova 
1327072be85SIrina Sokolova   PetscFunctionBegin;
1337072be85SIrina Sokolova   if (baijmkl) {
1347072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
1357072be85SIrina Sokolova     if (baijmkl->sparse_optimized) {
1367072be85SIrina Sokolova       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1377072be85SIrina Sokolova       stat = mkl_sparse_destroy(baijmkl->bsrA);
1389c46acdfSRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1397072be85SIrina Sokolova     }
140*b9e7e5c1SBarry Smith     ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr);
1417072be85SIrina Sokolova     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
1427072be85SIrina Sokolova   }
1437072be85SIrina Sokolova 
1447072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
1457072be85SIrina Sokolova    * to destroy everything that remains. */
1467072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
1477072be85SIrina Sokolova   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
1487072be85SIrina Sokolova   PetscFunctionReturn(0);
1497072be85SIrina Sokolova }
1507072be85SIrina Sokolova 
151*b9e7e5c1SBarry Smith static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
1527072be85SIrina Sokolova {
1537072be85SIrina Sokolova   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1547072be85SIrina Sokolova   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
1550835cbf9SRichard Tran Mills   PetscInt        mbs, nbs, nz, bs;
1567072be85SIrina Sokolova   MatScalar       *aa;
1577072be85SIrina Sokolova   PetscInt        *aj,*ai;
1587072be85SIrina Sokolova   sparse_status_t stat;
1590835cbf9SRichard Tran Mills   PetscErrorCode  ierr;
16080278ffbSSatish Balay   PetscInt        i;
1617072be85SIrina Sokolova 
1627072be85SIrina Sokolova   PetscFunctionBegin;
1637072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1647072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
1657072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
166*b9e7e5c1SBarry Smith     ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr);
167017c2882SIrina Sokolova     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
1687072be85SIrina Sokolova   }
1697072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
1707072be85SIrina Sokolova 
1717072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
1727072be85SIrina Sokolova   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
1737072be85SIrina Sokolova   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
1747072be85SIrina Sokolova   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
1757072be85SIrina Sokolova   mbs = a->mbs;
1767072be85SIrina Sokolova   nbs = a->nbs;
1777072be85SIrina Sokolova   nz  = a->nz;
1787072be85SIrina Sokolova   bs  = A->rmap->bs;
1797072be85SIrina Sokolova   aa  = a->a;
1807072be85SIrina Sokolova 
18180095d54SIrina Sokolova   if ((nz!=0) & !(A->structure_only)) {
1827072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1837072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184*b9e7e5c1SBarry Smith     if (PetscSeqBAIJSupportsZeroBased()) {
1857072be85SIrina Sokolova       aj   = a->j;
1867072be85SIrina Sokolova       ai   = a->i;
187017c2882SIrina Sokolova       stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
188*b9e7e5c1SBarry Smith     } else {
189*b9e7e5c1SBarry Smith       ierr = PetscMalloc2(mbs+1,&ai,nz,&aj);CHKERRQ(ierr);
190*b9e7e5c1SBarry Smith       for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1;
191*b9e7e5c1SBarry Smith       for (i=0;i<nz;i++) aj[i] = a->j[i]+1;
1927072be85SIrina Sokolova       aa   = a->a;
193017c2882SIrina Sokolova       stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
1947072be85SIrina Sokolova       baijmkl->ai1 = ai;
1957072be85SIrina Sokolova       baijmkl->aj1 = aj;
196*b9e7e5c1SBarry Smith     }
197017c2882SIrina Sokolova     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198017c2882SIrina Sokolova     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199017c2882SIrina Sokolova     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
2007072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
2017072be85SIrina Sokolova   }
2027072be85SIrina Sokolova   PetscFunctionReturn(0);
2037072be85SIrina Sokolova }
2047072be85SIrina Sokolova 
205*b9e7e5c1SBarry Smith static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2067072be85SIrina Sokolova {
2077072be85SIrina Sokolova   PetscErrorCode ierr;
2087072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
2097072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
2107072be85SIrina Sokolova 
2117072be85SIrina Sokolova   PetscFunctionBegin;
2127072be85SIrina Sokolova   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
2137072be85SIrina Sokolova   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
21471bc03e0SIrina Sokolova   ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr);
21571bc03e0SIrina Sokolova   (*M)->spptr = (void*)baijmkl_dest;
2167072be85SIrina Sokolova   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
2177072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
2187072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2197072be85SIrina Sokolova   PetscFunctionReturn(0);
2207072be85SIrina Sokolova }
2217072be85SIrina Sokolova 
222*b9e7e5c1SBarry Smith static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
2237072be85SIrina Sokolova {
2247072be85SIrina Sokolova   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
2257072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
2267072be85SIrina Sokolova   const PetscScalar  *x;
2277072be85SIrina Sokolova   PetscScalar        *y;
2287072be85SIrina Sokolova   PetscErrorCode     ierr;
2297072be85SIrina Sokolova   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;
2307072be85SIrina Sokolova 
2317072be85SIrina Sokolova   PetscFunctionBegin;
2327072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2337072be85SIrina Sokolova   if (!a->nz) {
234*b9e7e5c1SBarry Smith     ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2357072be85SIrina Sokolova     PetscFunctionReturn(0);
2367072be85SIrina Sokolova   }
2377072be85SIrina Sokolova 
2387072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2397072be85SIrina Sokolova   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2407072be85SIrina Sokolova 
2417072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2427072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2437072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
2447072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
245017c2882SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2467072be85SIrina Sokolova   }
2477072be85SIrina Sokolova 
2487072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
249017c2882SIrina Sokolova   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
2507072be85SIrina Sokolova 
251*b9e7e5c1SBarry Smith   ierr = PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);CHKERRQ(ierr);
2527072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2537072be85SIrina Sokolova   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2547072be85SIrina Sokolova   PetscFunctionReturn(0);
2557072be85SIrina Sokolova }
2567072be85SIrina Sokolova 
257*b9e7e5c1SBarry Smith static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
2587072be85SIrina Sokolova {
2597072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
2607072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
2617072be85SIrina Sokolova   const PetscScalar *x;
2627072be85SIrina Sokolova   PetscScalar       *y;
2637072be85SIrina Sokolova   PetscErrorCode    ierr;
2647072be85SIrina Sokolova   sparse_status_t   stat;
2657072be85SIrina Sokolova 
2667072be85SIrina Sokolova   PetscFunctionBegin;
2677072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2687072be85SIrina Sokolova   if(!a->nz) {
269*b9e7e5c1SBarry Smith     ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2707072be85SIrina Sokolova     PetscFunctionReturn(0);
2717072be85SIrina Sokolova   }
2727072be85SIrina Sokolova 
2737072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2747072be85SIrina Sokolova   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2757072be85SIrina Sokolova 
2767072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2777072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2787072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
2797072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
280017c2882SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2817072be85SIrina Sokolova   }
2827072be85SIrina Sokolova 
2837072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
284017c2882SIrina Sokolova   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
2857072be85SIrina Sokolova 
286*b9e7e5c1SBarry Smith   ierr = PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);CHKERRQ(ierr);
2877072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2887072be85SIrina Sokolova   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2897072be85SIrina Sokolova   PetscFunctionReturn(0);
2907072be85SIrina Sokolova }
2917072be85SIrina Sokolova 
292*b9e7e5c1SBarry Smith static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
2937072be85SIrina Sokolova {
2947072be85SIrina Sokolova   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
2957072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
2967072be85SIrina Sokolova   const PetscScalar  *x;
2977072be85SIrina Sokolova   PetscScalar        *y,*z;
2987072be85SIrina Sokolova   PetscErrorCode     ierr;
2997072be85SIrina Sokolova   PetscInt           m=a->mbs*A->rmap->bs;
3007072be85SIrina Sokolova   PetscInt           i;
3017072be85SIrina Sokolova 
3027072be85SIrina Sokolova   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
3037072be85SIrina Sokolova 
3047072be85SIrina Sokolova   PetscFunctionBegin;
3057072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3067072be85SIrina Sokolova   if (!a->nz) {
307*b9e7e5c1SBarry Smith     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
3087072be85SIrina Sokolova     PetscFunctionReturn(0);
3097072be85SIrina Sokolova   }
3107072be85SIrina Sokolova 
3117072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3127072be85SIrina Sokolova   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
3137072be85SIrina Sokolova 
3147072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3157072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3167072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3177072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
318017c2882SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3197072be85SIrina Sokolova   }
3207072be85SIrina Sokolova 
3217072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3227072be85SIrina Sokolova   if (zz == yy) {
3237072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3247072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
325017c2882SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
3267072be85SIrina Sokolova   } else {
3277072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3287072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
329017c2882SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
3307072be85SIrina Sokolova     for (i=0; i<m; i++) {
3317072be85SIrina Sokolova       z[i] += y[i];
3327072be85SIrina Sokolova     }
3337072be85SIrina Sokolova   }
3347072be85SIrina Sokolova 
335*b9e7e5c1SBarry Smith   ierr = PetscLogFlops(2.0*a->bs2*a->nz);CHKERRQ(ierr);
3367072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3377072be85SIrina Sokolova   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
3387072be85SIrina Sokolova   PetscFunctionReturn(0);
3397072be85SIrina Sokolova }
3407072be85SIrina Sokolova 
341*b9e7e5c1SBarry Smith static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
3427072be85SIrina Sokolova {
3437072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
3447072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
3457072be85SIrina Sokolova   const PetscScalar *x;
3467072be85SIrina Sokolova   PetscScalar       *y,*z;
3477072be85SIrina Sokolova   PetscErrorCode    ierr;
3487072be85SIrina Sokolova   PetscInt          n=a->nbs*A->rmap->bs;
3497072be85SIrina Sokolova   PetscInt          i;
3507072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
3517072be85SIrina Sokolova   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
3527072be85SIrina Sokolova 
3537072be85SIrina Sokolova   PetscFunctionBegin;
3547072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3557072be85SIrina Sokolova   if(!a->nz) {
356*b9e7e5c1SBarry Smith     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
3577072be85SIrina Sokolova     PetscFunctionReturn(0);
3587072be85SIrina Sokolova   }
3597072be85SIrina Sokolova 
3607072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3617072be85SIrina Sokolova   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
3627072be85SIrina Sokolova 
3637072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3647072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3657072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3667072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
367017c2882SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3687072be85SIrina Sokolova   }
3697072be85SIrina Sokolova 
3707072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3717072be85SIrina Sokolova   if (zz == yy) {
3727072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3737072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
374017c2882SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
3757072be85SIrina Sokolova   } else {
3767072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3777072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
378017c2882SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
3797072be85SIrina Sokolova     for (i=0; i<n; i++) {
3807072be85SIrina Sokolova       z[i] += y[i];
3817072be85SIrina Sokolova     }
3827072be85SIrina Sokolova   }
3837072be85SIrina Sokolova 
384*b9e7e5c1SBarry Smith   ierr = PetscLogFlops(2.0*a->bs2*a->nz);CHKERRQ(ierr);
3857072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3867072be85SIrina Sokolova   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
3877072be85SIrina Sokolova   PetscFunctionReturn(0);
3887072be85SIrina Sokolova }
3897072be85SIrina Sokolova 
390*b9e7e5c1SBarry Smith static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
3917072be85SIrina Sokolova {
3927072be85SIrina Sokolova   PetscErrorCode ierr;
3937072be85SIrina Sokolova 
3947072be85SIrina Sokolova   PetscFunctionBegin;
3957072be85SIrina Sokolova   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
3967072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
3977072be85SIrina Sokolova   PetscFunctionReturn(0);
3987072be85SIrina Sokolova }
3997072be85SIrina Sokolova 
400*b9e7e5c1SBarry Smith static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
4017072be85SIrina Sokolova {
4027072be85SIrina Sokolova   PetscErrorCode ierr;
4037072be85SIrina Sokolova 
4047072be85SIrina Sokolova   PetscFunctionBegin;
4057072be85SIrina Sokolova   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
4067072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
4077072be85SIrina Sokolova   PetscFunctionReturn(0);
4087072be85SIrina Sokolova }
4097072be85SIrina Sokolova 
410*b9e7e5c1SBarry Smith static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
4117072be85SIrina Sokolova {
4127072be85SIrina Sokolova   PetscErrorCode ierr;
4137072be85SIrina Sokolova 
4147072be85SIrina Sokolova   PetscFunctionBegin;
4157072be85SIrina Sokolova   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
4167072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
4177072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
4187072be85SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
4197072be85SIrina Sokolova   }
4207072be85SIrina Sokolova   PetscFunctionReturn(0);
4217072be85SIrina Sokolova }
4227072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
4237072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
4247072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
4257072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
4267072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
4277072be85SIrina Sokolova {
4287072be85SIrina Sokolova   PetscErrorCode ierr;
4297072be85SIrina Sokolova   Mat            B = *newmat;
4307072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
4317072be85SIrina Sokolova   PetscBool      sametype;
4327072be85SIrina Sokolova 
4337072be85SIrina Sokolova   PetscFunctionBegin;
4347072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
4357072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
4367072be85SIrina Sokolova   }
4377072be85SIrina Sokolova 
4387072be85SIrina Sokolova   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
4397072be85SIrina Sokolova   if (sametype) PetscFunctionReturn(0);
4407072be85SIrina Sokolova 
4417072be85SIrina Sokolova   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
4427072be85SIrina Sokolova   B->spptr = (void*)baijmkl;
4437072be85SIrina Sokolova 
4447072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
4457072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
4467072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
4477072be85SIrina Sokolova 
4487072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
4497072be85SIrina Sokolova 
4507072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
4517072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
4527072be85SIrina Sokolova 
4537072be85SIrina Sokolova   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
4547072be85SIrina Sokolova   *newmat = B;
4557072be85SIrina Sokolova   PetscFunctionReturn(0);
4567072be85SIrina Sokolova }
4579c46acdfSRichard Tran Mills 
458*b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
4594d6dccb5SIrina Sokolova {
4604d6dccb5SIrina Sokolova   PetscErrorCode  ierr;
4619c46acdfSRichard Tran Mills 
4624d6dccb5SIrina Sokolova   PetscFunctionBegin;
4634d6dccb5SIrina Sokolova   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
4644d6dccb5SIrina Sokolova   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
4654d6dccb5SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
4664d6dccb5SIrina Sokolova   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
4674d6dccb5SIrina Sokolova   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
4684d6dccb5SIrina Sokolova   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
4694d6dccb5SIrina Sokolova   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
4704d6dccb5SIrina Sokolova   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
4714d6dccb5SIrina Sokolova   A->ops->scale            = MatScale_SeqBAIJMKL;
4724d6dccb5SIrina Sokolova   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
4734d6dccb5SIrina Sokolova   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
4744d6dccb5SIrina Sokolova   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
4754d6dccb5SIrina Sokolova   PetscFunctionReturn(0);
4764d6dccb5SIrina Sokolova }
4779c46acdfSRichard Tran Mills 
4787072be85SIrina Sokolova /*@C
4797072be85SIrina Sokolova    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
4807072be85SIrina Sokolova    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
4817072be85SIrina Sokolova    routines from Intel MKL whenever possible.
4827072be85SIrina Sokolova    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
4837072be85SIrina Sokolova    operations are currently supported.
4847072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
4857072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
4867072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
4877072be85SIrina Sokolova 
4887072be85SIrina Sokolova    Input Parameters:
4897072be85SIrina Sokolova +  comm - MPI communicator, set to PETSC_COMM_SELF
4907072be85SIrina Sokolova .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
4917072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
4927072be85SIrina Sokolova .  m - number of rows
4937072be85SIrina Sokolova .  n - number of columns
4947072be85SIrina Sokolova .  nz - number of nonzero blocks  per block row (same for all rows)
4957072be85SIrina Sokolova -  nnz - array containing the number of nonzero blocks in the various block rows
4967072be85SIrina Sokolova          (possibly different for each block row) or NULL
4977072be85SIrina Sokolova 
4987072be85SIrina Sokolova 
4997072be85SIrina Sokolova    Output Parameter:
5007072be85SIrina Sokolova .  A - the matrix
5017072be85SIrina Sokolova 
5027072be85SIrina Sokolova    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
503f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
5047072be85SIrina Sokolova    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
5057072be85SIrina Sokolova 
5067072be85SIrina Sokolova    Options Database Keys:
5077072be85SIrina Sokolova .   -mat_no_unroll - uses code that does not unroll the loops in the
5087072be85SIrina Sokolova                      block calculations (much slower)
5097072be85SIrina Sokolova .    -mat_block_size - size of the blocks to use
5107072be85SIrina Sokolova 
5117072be85SIrina Sokolova    Level: intermediate
5127072be85SIrina Sokolova 
5137072be85SIrina Sokolova    Notes:
5147072be85SIrina Sokolova    The number of rows and columns must be divisible by blocksize.
5157072be85SIrina Sokolova 
5167072be85SIrina Sokolova    If the nnz parameter is given then the nz parameter is ignored
5177072be85SIrina Sokolova 
5187072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
5197072be85SIrina Sokolova 
5207072be85SIrina Sokolova    The block AIJ format is fully compatible with standard Fortran 77
5217072be85SIrina Sokolova    storage.  That is, the stored row and column indices can begin at
5227072be85SIrina Sokolova    either one (as in Fortran) or zero.  See the users' manual for details.
5237072be85SIrina Sokolova 
5247072be85SIrina Sokolova    Specify the preallocated storage with either nz or nnz (not both).
5257072be85SIrina Sokolova    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
5267072be85SIrina Sokolova    allocation.  See Users-Manual: ch_mat for details.
5277072be85SIrina Sokolova    matrices.
5287072be85SIrina Sokolova 
5297072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
5307072be85SIrina Sokolova 
5317072be85SIrina Sokolova @*/
5327072be85SIrina Sokolova PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
5337072be85SIrina Sokolova {
5347072be85SIrina Sokolova   PetscErrorCode ierr;
5357072be85SIrina Sokolova 
5367072be85SIrina Sokolova   PetscFunctionBegin;
5377072be85SIrina Sokolova   ierr = MatCreate(comm,A);CHKERRQ(ierr);
5387072be85SIrina Sokolova   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
5397072be85SIrina Sokolova   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
5407072be85SIrina Sokolova   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
5417072be85SIrina Sokolova   PetscFunctionReturn(0);
5427072be85SIrina Sokolova }
5439c46acdfSRichard Tran Mills 
5447072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
5457072be85SIrina Sokolova {
5467072be85SIrina Sokolova   PetscErrorCode ierr;
5477072be85SIrina Sokolova 
5487072be85SIrina Sokolova   PetscFunctionBegin;
5497072be85SIrina Sokolova   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
5507072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
5517072be85SIrina Sokolova   PetscFunctionReturn(0);
5527072be85SIrina Sokolova }
553