1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL version is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 11 #define MKL_ILP64 12 #endif 13 #include <mkl_spblas.h> 14 15 static PetscBool PetscSeqBAIJSupportsZeroBased(void) 16 { 17 static PetscBool set = PETSC_FALSE, value; 18 int n = 1, ia[1], ja[1]; 19 float a[1]; 20 sparse_status_t status; 21 sparse_matrix_t A; 22 23 if (!set) { 24 status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a); 25 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 26 (void)mkl_sparse_destroy(A); 27 set = PETSC_TRUE; 28 } 29 return value; 30 } 31 32 typedef struct { 33 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 34 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 35 struct matrix_descr descr; 36 PetscInt *ai1; 37 PetscInt *aj1; 38 } Mat_SeqBAIJMKL; 39 40 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 41 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType); 42 43 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 44 { 45 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 46 /* so we will ignore 'MatType type'. */ 47 Mat B = *newmat; 48 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 49 50 PetscFunctionBegin; 51 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 52 53 /* Reset the original function pointers. */ 54 B->ops->duplicate = MatDuplicate_SeqBAIJ; 55 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 56 B->ops->destroy = MatDestroy_SeqBAIJ; 57 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 58 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 59 B->ops->scale = MatScale_SeqBAIJ; 60 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 61 B->ops->axpy = MatAXPY_SeqBAIJ; 62 63 switch (A->rmap->bs) { 64 case 1: 65 B->ops->mult = MatMult_SeqBAIJ_1; 66 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 67 break; 68 case 2: 69 B->ops->mult = MatMult_SeqBAIJ_2; 70 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 71 break; 72 case 3: 73 B->ops->mult = MatMult_SeqBAIJ_3; 74 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 75 break; 76 case 4: 77 B->ops->mult = MatMult_SeqBAIJ_4; 78 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 79 break; 80 case 5: 81 B->ops->mult = MatMult_SeqBAIJ_5; 82 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 83 break; 84 case 6: 85 B->ops->mult = MatMult_SeqBAIJ_6; 86 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 87 break; 88 case 7: 89 B->ops->mult = MatMult_SeqBAIJ_7; 90 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 91 break; 92 case 11: 93 B->ops->mult = MatMult_SeqBAIJ_11; 94 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 95 break; 96 case 15: 97 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 98 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 99 break; 100 default: 101 B->ops->mult = MatMult_SeqBAIJ_N; 102 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 103 break; 104 } 105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL)); 106 107 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 108 * simply involves destroying the MKL sparse matrix handle and then freeing 109 * the spptr pointer. */ 110 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr; 111 112 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 113 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 114 PetscCall(PetscFree(B->spptr)); 115 116 /* Change the type of B to MATSEQBAIJ. */ 117 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 118 119 *newmat = B; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 124 { 125 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 126 127 PetscFunctionBegin; 128 if (baijmkl) { 129 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 130 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 131 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 132 PetscCall(PetscFree(A->spptr)); 133 } 134 135 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 136 * to destroy everything that remains. */ 137 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 138 PetscCall(MatDestroy_SeqBAIJ(A)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 143 { 144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 145 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 146 PetscInt mbs, nbs, nz, bs; 147 MatScalar *aa; 148 PetscInt *aj, *ai; 149 PetscInt i; 150 151 PetscFunctionBegin; 152 if (baijmkl->sparse_optimized) { 153 /* Matrix has been previously assembled and optimized. Must destroy old 154 * matrix handle before running the optimization step again. */ 155 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 156 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 157 } 158 baijmkl->sparse_optimized = PETSC_FALSE; 159 160 /* Now perform the SpMV2 setup and matrix optimization. */ 161 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 164 mbs = a->mbs; 165 nbs = a->nbs; 166 nz = a->nz; 167 bs = A->rmap->bs; 168 aa = a->a; 169 170 if ((nz != 0) & !A->structure_only) { 171 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 172 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 173 if (PetscSeqBAIJSupportsZeroBased()) { 174 aj = a->j; 175 ai = a->i; 176 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 177 } else { 178 PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj)); 179 for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1; 180 for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1; 181 aa = a->a; 182 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 183 baijmkl->ai1 = ai; 184 baijmkl->aj1 = aj; 185 } 186 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000)); 187 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE)); 188 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 189 baijmkl->sparse_optimized = PETSC_TRUE; 190 } 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 195 { 196 Mat_SeqBAIJMKL *baijmkl; 197 Mat_SeqBAIJMKL *baijmkl_dest; 198 199 PetscFunctionBegin; 200 PetscCall(MatDuplicate_SeqBAIJ(A, op, M)); 201 baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 202 PetscCall(PetscNew(&baijmkl_dest)); 203 (*M)->spptr = (void *)baijmkl_dest; 204 PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL))); 205 baijmkl_dest->sparse_optimized = PETSC_FALSE; 206 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 210 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 211 { 212 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 213 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 214 const PetscScalar *x; 215 PetscScalar *y; 216 217 PetscFunctionBegin; 218 /* If there are no nonzero entries, zero yy and return immediately. */ 219 if (!a->nz) { 220 PetscCall(VecSet(yy, 0.0)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 224 PetscCall(VecGetArrayRead(xx, &x)); 225 PetscCall(VecGetArray(yy, &y)); 226 227 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 228 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 229 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 230 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 231 232 /* Call MKL SpMV2 executor routine to do the MatMult. */ 233 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 234 235 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 236 PetscCall(VecRestoreArrayRead(xx, &x)); 237 PetscCall(VecRestoreArray(yy, &y)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 244 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 245 const PetscScalar *x; 246 PetscScalar *y; 247 248 PetscFunctionBegin; 249 /* If there are no nonzero entries, zero yy and return immediately. */ 250 if (!a->nz) { 251 PetscCall(VecSet(yy, 0.0)); 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PetscCall(VecGetArrayRead(xx, &x)); 256 PetscCall(VecGetArray(yy, &y)); 257 258 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 259 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 260 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 261 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 262 263 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 264 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 265 266 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 267 PetscCall(VecRestoreArrayRead(xx, &x)); 268 PetscCall(VecRestoreArray(yy, &y)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 275 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 276 const PetscScalar *x; 277 PetscScalar *y, *z; 278 PetscInt m = a->mbs * A->rmap->bs; 279 PetscInt i; 280 281 PetscFunctionBegin; 282 /* If there are no nonzero entries, set zz = yy and return immediately. */ 283 if (!a->nz) { 284 PetscCall(VecCopy(yy, zz)); 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 PetscCall(VecGetArrayRead(xx, &x)); 289 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 290 291 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 292 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 293 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 294 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 295 296 /* Call MKL sparse BLAS routine to do the MatMult. */ 297 if (zz == yy) { 298 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 299 * with alpha and beta both set to 1.0. */ 300 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 301 } else { 302 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 303 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 304 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 305 for (i = 0; i < m; i++) z[i] += y[i]; 306 } 307 308 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 309 PetscCall(VecRestoreArrayRead(xx, &x)); 310 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 315 { 316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 317 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 318 const PetscScalar *x; 319 PetscScalar *y, *z; 320 PetscInt n = a->nbs * A->rmap->bs; 321 PetscInt i; 322 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 323 324 PetscFunctionBegin; 325 /* If there are no nonzero entries, set zz = yy and return immediately. */ 326 if (!a->nz) { 327 PetscCall(VecCopy(yy, zz)); 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscCall(VecGetArrayRead(xx, &x)); 332 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 333 334 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 335 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 336 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 337 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 338 339 /* Call MKL sparse BLAS routine to do the MatMult. */ 340 if (zz == yy) { 341 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 342 * with alpha and beta both set to 1.0. */ 343 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 344 } else { 345 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 346 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 347 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 348 for (i = 0; i < n; i++) z[i] += y[i]; 349 } 350 351 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 352 PetscCall(VecRestoreArrayRead(xx, &x)); 353 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) 358 { 359 PetscFunctionBegin; 360 PetscCall(MatScale_SeqBAIJ(inA, alpha)); 361 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 362 PetscFunctionReturn(PETSC_SUCCESS); 363 } 364 365 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) 366 { 367 PetscFunctionBegin; 368 PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr)); 369 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 373 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) 374 { 375 PetscFunctionBegin; 376 PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str)); 377 if (str == SAME_NONZERO_PATTERN) { 378 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 379 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 380 } 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 384 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 385 * routine, but can also be used to convert an assembled SeqBAIJ matrix 386 * into a SeqBAIJMKL one. */ 387 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 388 { 389 Mat B = *newmat; 390 Mat_SeqBAIJMKL *baijmkl; 391 PetscBool sametype; 392 393 PetscFunctionBegin; 394 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 395 396 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 397 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 398 399 PetscCall(PetscNew(&baijmkl)); 400 B->spptr = (void *)baijmkl; 401 402 /* Set function pointers for methods that we inherit from BAIJ but override. 403 * We also parse some command line options below, since those determine some of the methods we point to. */ 404 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 405 406 baijmkl->sparse_optimized = PETSC_FALSE; 407 408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL)); 409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ)); 410 411 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL)); 412 *newmat = B; 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 417 { 418 PetscFunctionBegin; 419 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 420 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 421 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 422 A->ops->destroy = MatDestroy_SeqBAIJMKL; 423 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 424 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 425 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 426 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 427 A->ops->scale = MatScale_SeqBAIJMKL; 428 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 429 A->ops->axpy = MatAXPY_SeqBAIJMKL; 430 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 /*@C 435 MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`. 436 This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS 437 routines from Intel MKL whenever possible. 438 439 Input Parameters: 440 + comm - MPI communicator, set to `PETSC_COMM_SELF` 441 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 442 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 443 . m - number of rows 444 . n - number of columns 445 . nz - number of nonzero blocks per block row (same for all rows) 446 - nnz - array containing the number of nonzero blocks in the various block rows 447 (possibly different for each block row) or `NULL` 448 449 Output Parameter: 450 . A - the matrix 451 452 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 453 MatXXXXSetPreallocation() paradigm instead of this routine directly. 454 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 455 456 Options Database Keys: 457 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 458 - -mat_block_size - size of the blocks to use 459 460 Level: intermediate 461 462 Notes: 463 The number of rows and columns must be divisible by blocksize. 464 465 If the `nnz` parameter is given then the `nz` parameter is ignored 466 467 A nonzero block is any block that as 1 or more nonzeros in it 468 469 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 470 operations are currently supported. 471 If the installed version of MKL supports the "SpMV2" sparse 472 inspector-executor routines, then those are used by default. 473 Default PETSc kernels are used otherwise. 474 475 The `MATSEQBAIJ` format is fully compatible with standard Fortran 476 storage. That is, the stored row and column indices can begin at 477 either one (as in Fortran) or zero. See the users' manual for details. 478 479 Specify the preallocated storage with either nz or nnz (not both). 480 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 481 allocation. See [Sparse Matrices](sec_matsparse) for details. 482 matrices. 483 484 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 485 @*/ 486 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 487 { 488 PetscFunctionBegin; 489 PetscCall(MatCreate(comm, A)); 490 PetscCall(MatSetSizes(*A, m, n, m, n)); 491 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 492 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz)); 493 PetscFunctionReturn(PETSC_SUCCESS); 494 } 495 496 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 497 { 498 PetscFunctionBegin; 499 PetscCall(MatSetType(A, MATSEQBAIJ)); 500 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503