1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 #include <mkl_spblas.h> 11 12 static PetscBool PetscSeqBAIJSupportsZeroBased(void) 13 { 14 static PetscBool set = PETSC_FALSE, value; 15 int n = 1, ia[1], ja[1]; 16 float a[1]; 17 sparse_status_t status; 18 sparse_matrix_t A; 19 20 if (!set) { 21 status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a); 22 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 23 (void)mkl_sparse_destroy(A); 24 set = PETSC_TRUE; 25 } 26 return value; 27 } 28 29 typedef struct { 30 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 31 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 32 struct matrix_descr descr; 33 PetscInt *ai1; 34 PetscInt *aj1; 35 } Mat_SeqBAIJMKL; 36 37 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 38 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType); 39 40 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 41 { 42 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 43 /* so we will ignore 'MatType type'. */ 44 Mat B = *newmat; 45 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 46 47 PetscFunctionBegin; 48 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 49 50 /* Reset the original function pointers. */ 51 B->ops->duplicate = MatDuplicate_SeqBAIJ; 52 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 53 B->ops->destroy = MatDestroy_SeqBAIJ; 54 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 55 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 56 B->ops->scale = MatScale_SeqBAIJ; 57 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 58 B->ops->axpy = MatAXPY_SeqBAIJ; 59 60 switch (A->rmap->bs) { 61 case 1: 62 B->ops->mult = MatMult_SeqBAIJ_1; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 64 break; 65 case 2: 66 B->ops->mult = MatMult_SeqBAIJ_2; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 68 break; 69 case 3: 70 B->ops->mult = MatMult_SeqBAIJ_3; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 72 break; 73 case 4: 74 B->ops->mult = MatMult_SeqBAIJ_4; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 76 break; 77 case 5: 78 B->ops->mult = MatMult_SeqBAIJ_5; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 80 break; 81 case 6: 82 B->ops->mult = MatMult_SeqBAIJ_6; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 84 break; 85 case 7: 86 B->ops->mult = MatMult_SeqBAIJ_7; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 88 break; 89 case 11: 90 B->ops->mult = MatMult_SeqBAIJ_11; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 92 break; 93 case 15: 94 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 95 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 96 break; 97 default: 98 B->ops->mult = MatMult_SeqBAIJ_N; 99 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 100 break; 101 } 102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL)); 103 104 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 105 * simply involves destroying the MKL sparse matrix handle and then freeing 106 * the spptr pointer. */ 107 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr; 108 109 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 110 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 111 PetscCall(PetscFree(B->spptr)); 112 113 /* Change the type of B to MATSEQBAIJ. */ 114 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 115 116 *newmat = B; 117 PetscFunctionReturn(0); 118 } 119 120 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 121 { 122 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 123 124 PetscFunctionBegin; 125 if (baijmkl) { 126 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 127 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 128 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 129 PetscCall(PetscFree(A->spptr)); 130 } 131 132 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 133 * to destroy everything that remains. */ 134 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 135 PetscCall(MatDestroy_SeqBAIJ(A)); 136 PetscFunctionReturn(0); 137 } 138 139 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 142 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 143 PetscInt mbs, nbs, nz, bs; 144 MatScalar *aa; 145 PetscInt *aj, *ai; 146 PetscInt i; 147 148 PetscFunctionBegin; 149 if (baijmkl->sparse_optimized) { 150 /* Matrix has been previously assembled and optimized. Must destroy old 151 * matrix handle before running the optimization step again. */ 152 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 153 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 154 } 155 baijmkl->sparse_optimized = PETSC_FALSE; 156 157 /* Now perform the SpMV2 setup and matrix optimization. */ 158 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 159 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 160 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 161 mbs = a->mbs; 162 nbs = a->nbs; 163 nz = a->nz; 164 bs = A->rmap->bs; 165 aa = a->a; 166 167 if ((nz != 0) & !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 if (PetscSeqBAIJSupportsZeroBased()) { 171 aj = a->j; 172 ai = a->i; 173 PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa)); 174 } else { 175 PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj)); 176 for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1; 177 for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1; 178 aa = a->a; 179 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa)); 180 baijmkl->ai1 = ai; 181 baijmkl->aj1 = aj; 182 } 183 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000)); 184 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE)); 185 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 186 baijmkl->sparse_optimized = PETSC_TRUE; 187 } 188 PetscFunctionReturn(0); 189 } 190 191 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 192 { 193 Mat_SeqBAIJMKL *baijmkl; 194 Mat_SeqBAIJMKL *baijmkl_dest; 195 196 PetscFunctionBegin; 197 PetscCall(MatDuplicate_SeqBAIJ(A, op, M)); 198 baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 199 PetscCall(PetscNew(&baijmkl_dest)); 200 (*M)->spptr = (void *)baijmkl_dest; 201 PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL))); 202 baijmkl_dest->sparse_optimized = PETSC_FALSE; 203 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 208 { 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 210 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 211 const PetscScalar *x; 212 PetscScalar *y; 213 214 PetscFunctionBegin; 215 /* If there are no nonzero entries, zero yy and return immediately. */ 216 if (!a->nz) { 217 PetscCall(VecSet(yy, 0.0)); 218 PetscFunctionReturn(0); 219 } 220 221 PetscCall(VecGetArrayRead(xx, &x)); 222 PetscCall(VecGetArray(yy, &y)); 223 224 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 225 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 226 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 227 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 228 229 /* Call MKL SpMV2 executor routine to do the MatMult. */ 230 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 231 232 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 233 PetscCall(VecRestoreArrayRead(xx, &x)); 234 PetscCall(VecRestoreArray(yy, &y)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 239 { 240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 241 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 242 const PetscScalar *x; 243 PetscScalar *y; 244 245 PetscFunctionBegin; 246 /* If there are no nonzero entries, zero yy and return immediately. */ 247 if (!a->nz) { 248 PetscCall(VecSet(yy, 0.0)); 249 PetscFunctionReturn(0); 250 } 251 252 PetscCall(VecGetArrayRead(xx, &x)); 253 PetscCall(VecGetArray(yy, &y)); 254 255 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 256 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 257 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 258 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 259 260 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 261 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 262 263 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 264 PetscCall(VecRestoreArrayRead(xx, &x)); 265 PetscCall(VecRestoreArray(yy, &y)); 266 PetscFunctionReturn(0); 267 } 268 269 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 270 { 271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 272 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 273 const PetscScalar *x; 274 PetscScalar *y, *z; 275 PetscInt m = a->mbs * A->rmap->bs; 276 PetscInt i; 277 278 PetscFunctionBegin; 279 /* If there are no nonzero entries, set zz = yy and return immediately. */ 280 if (!a->nz) { 281 PetscCall(VecCopy(yy, zz)); 282 PetscFunctionReturn(0); 283 } 284 285 PetscCall(VecGetArrayRead(xx, &x)); 286 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 287 288 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 289 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 290 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 291 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 292 293 /* Call MKL sparse BLAS routine to do the MatMult. */ 294 if (zz == yy) { 295 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 296 * with alpha and beta both set to 1.0. */ 297 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 298 } else { 299 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 300 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 301 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 302 for (i = 0; i < m; i++) z[i] += y[i]; 303 } 304 305 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 306 PetscCall(VecRestoreArrayRead(xx, &x)); 307 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 314 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 315 const PetscScalar *x; 316 PetscScalar *y, *z; 317 PetscInt n = a->nbs * A->rmap->bs; 318 PetscInt i; 319 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 320 321 PetscFunctionBegin; 322 /* If there are no nonzero entries, set zz = yy and return immediately. */ 323 if (!a->nz) { 324 PetscCall(VecCopy(yy, zz)); 325 PetscFunctionReturn(0); 326 } 327 328 PetscCall(VecGetArrayRead(xx, &x)); 329 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 330 331 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 332 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 333 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 334 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 335 336 /* Call MKL sparse BLAS routine to do the MatMult. */ 337 if (zz == yy) { 338 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 339 * with alpha and beta both set to 1.0. */ 340 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 341 } else { 342 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 343 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 344 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 345 for (i = 0; i < n; i++) z[i] += y[i]; 346 } 347 348 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 349 PetscCall(VecRestoreArrayRead(xx, &x)); 350 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 351 PetscFunctionReturn(0); 352 } 353 354 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) 355 { 356 PetscFunctionBegin; 357 PetscCall(MatScale_SeqBAIJ(inA, alpha)); 358 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 359 PetscFunctionReturn(0); 360 } 361 362 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) 363 { 364 PetscFunctionBegin; 365 PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr)); 366 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 367 PetscFunctionReturn(0); 368 } 369 370 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) 371 { 372 PetscFunctionBegin; 373 PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str)); 374 if (str == SAME_NONZERO_PATTERN) { 375 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 376 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 377 } 378 PetscFunctionReturn(0); 379 } 380 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 381 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 382 * routine, but can also be used to convert an assembled SeqBAIJ matrix 383 * into a SeqBAIJMKL one. */ 384 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 385 { 386 Mat B = *newmat; 387 Mat_SeqBAIJMKL *baijmkl; 388 PetscBool sametype; 389 390 PetscFunctionBegin; 391 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 392 393 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 394 if (sametype) PetscFunctionReturn(0); 395 396 PetscCall(PetscNew(&baijmkl)); 397 B->spptr = (void *)baijmkl; 398 399 /* Set function pointers for methods that we inherit from BAIJ but override. 400 * We also parse some command line options below, since those determine some of the methods we point to. */ 401 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 402 403 baijmkl->sparse_optimized = PETSC_FALSE; 404 405 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL)); 406 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ)); 407 408 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL)); 409 *newmat = B; 410 PetscFunctionReturn(0); 411 } 412 413 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 414 { 415 PetscFunctionBegin; 416 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 417 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 418 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 419 A->ops->destroy = MatDestroy_SeqBAIJMKL; 420 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 421 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 422 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 423 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 424 A->ops->scale = MatScale_SeqBAIJMKL; 425 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 426 A->ops->axpy = MatAXPY_SeqBAIJMKL; 427 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 428 PetscFunctionReturn(0); 429 } 430 431 /*@C 432 MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`. 433 This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS 434 routines from Intel MKL whenever possible. 435 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 436 operations are currently supported. 437 If the installed version of MKL supports the "SpMV2" sparse 438 inspector-executor routines, then those are used by default. 439 Default PETSc kernels are used otherwise. 440 441 Input Parameters: 442 + comm - MPI communicator, set to `PETSC_COMM_SELF` 443 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 444 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 445 . m - number of rows 446 . n - number of columns 447 . nz - number of nonzero blocks per block row (same for all rows) 448 - nnz - array containing the number of nonzero blocks in the various block rows 449 (possibly different for each block row) or NULL 450 451 Output Parameter: 452 . A - the matrix 453 454 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 455 MatXXXXSetPreallocation() paradigm instead of this routine directly. 456 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 457 458 Options Database Keys: 459 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 460 - -mat_block_size - size of the blocks to use 461 462 Level: intermediate 463 464 Notes: 465 The number of rows and columns must be divisible by blocksize. 466 467 If the nnz parameter is given then the nz parameter is ignored 468 469 A nonzero block is any block that as 1 or more nonzeros in it 470 471 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 472 storage. That is, the stored row and column indices can begin at 473 either one (as in Fortran) or zero. See the users' manual for details. 474 475 Specify the preallocated storage with either nz or nnz (not both). 476 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 477 allocation. See [Sparse Matrices](sec_matsparse) for details. 478 matrices. 479 480 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 481 @*/ 482 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 483 { 484 PetscFunctionBegin; 485 PetscCall(MatCreate(comm, A)); 486 PetscCall(MatSetSizes(*A, m, n, m, n)); 487 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 488 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz)); 489 PetscFunctionReturn(0); 490 } 491 492 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 493 { 494 PetscFunctionBegin; 495 PetscCall(MatSetType(A, MATSEQBAIJ)); 496 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A)); 497 PetscFunctionReturn(0); 498 } 499