1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 #include <mkl_spblas.h> 11 12 static PetscBool PetscSeqBAIJSupportsZeroBased(void) { 13 static PetscBool set = PETSC_FALSE, value; 14 int n = 1, ia[1], ja[1]; 15 float a[1]; 16 sparse_status_t status; 17 sparse_matrix_t A; 18 19 if (!set) { 20 status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a); 21 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 22 (void)mkl_sparse_destroy(A); 23 set = PETSC_TRUE; 24 } 25 return value; 26 } 27 28 typedef struct { 29 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 30 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 31 struct matrix_descr descr; 32 PetscInt *ai1; 33 PetscInt *aj1; 34 } Mat_SeqBAIJMKL; 35 36 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 37 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType); 38 39 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 40 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 41 /* so we will ignore 'MatType type'. */ 42 Mat B = *newmat; 43 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 44 45 PetscFunctionBegin; 46 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 47 48 /* Reset the original function pointers. */ 49 B->ops->duplicate = MatDuplicate_SeqBAIJ; 50 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 51 B->ops->destroy = MatDestroy_SeqBAIJ; 52 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 53 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 54 B->ops->scale = MatScale_SeqBAIJ; 55 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 56 B->ops->axpy = MatAXPY_SeqBAIJ; 57 58 switch (A->rmap->bs) { 59 case 1: 60 B->ops->mult = MatMult_SeqBAIJ_1; 61 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 62 break; 63 case 2: 64 B->ops->mult = MatMult_SeqBAIJ_2; 65 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 66 break; 67 case 3: 68 B->ops->mult = MatMult_SeqBAIJ_3; 69 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 70 break; 71 case 4: 72 B->ops->mult = MatMult_SeqBAIJ_4; 73 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 74 break; 75 case 5: 76 B->ops->mult = MatMult_SeqBAIJ_5; 77 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 78 break; 79 case 6: 80 B->ops->mult = MatMult_SeqBAIJ_6; 81 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 82 break; 83 case 7: 84 B->ops->mult = MatMult_SeqBAIJ_7; 85 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 86 break; 87 case 11: 88 B->ops->mult = MatMult_SeqBAIJ_11; 89 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 90 break; 91 case 15: 92 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 93 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 94 break; 95 default: 96 B->ops->mult = MatMult_SeqBAIJ_N; 97 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 98 break; 99 } 100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL)); 101 102 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 103 * simply involves destroying the MKL sparse matrix handle and then freeing 104 * the spptr pointer. */ 105 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr; 106 107 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 108 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 109 PetscCall(PetscFree(B->spptr)); 110 111 /* Change the type of B to MATSEQBAIJ. */ 112 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 113 114 *newmat = B; 115 PetscFunctionReturn(0); 116 } 117 118 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) { 119 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 120 121 PetscFunctionBegin; 122 if (baijmkl) { 123 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 124 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 125 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 126 PetscCall(PetscFree(A->spptr)); 127 } 128 129 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 130 * to destroy everything that remains. */ 131 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 132 PetscCall(MatDestroy_SeqBAIJ(A)); 133 PetscFunctionReturn(0); 134 } 135 136 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 138 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 139 PetscInt mbs, nbs, nz, bs; 140 MatScalar *aa; 141 PetscInt *aj, *ai; 142 PetscInt i; 143 144 PetscFunctionBegin; 145 if (baijmkl->sparse_optimized) { 146 /* Matrix has been previously assembled and optimized. Must destroy old 147 * matrix handle before running the optimization step again. */ 148 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 149 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 150 } 151 baijmkl->sparse_optimized = PETSC_FALSE; 152 153 /* Now perform the SpMV2 setup and matrix optimization. */ 154 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 155 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 156 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 157 mbs = a->mbs; 158 nbs = a->nbs; 159 nz = a->nz; 160 bs = A->rmap->bs; 161 aa = a->a; 162 163 if ((nz != 0) & !(A->structure_only)) { 164 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 165 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 166 if (PetscSeqBAIJSupportsZeroBased()) { 167 aj = a->j; 168 ai = a->i; 169 PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa)); 170 } else { 171 PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj)); 172 for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1; 173 for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1; 174 aa = a->a; 175 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa)); 176 baijmkl->ai1 = ai; 177 baijmkl->aj1 = aj; 178 } 179 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000)); 180 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE)); 181 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 182 baijmkl->sparse_optimized = PETSC_TRUE; 183 } 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) { 188 Mat_SeqBAIJMKL *baijmkl; 189 Mat_SeqBAIJMKL *baijmkl_dest; 190 191 PetscFunctionBegin; 192 PetscCall(MatDuplicate_SeqBAIJ(A, op, M)); 193 baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 194 PetscCall(PetscNewLog((*M), &baijmkl_dest)); 195 (*M)->spptr = (void *)baijmkl_dest; 196 PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL))); 197 baijmkl_dest->sparse_optimized = PETSC_FALSE; 198 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 204 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 205 const PetscScalar *x; 206 PetscScalar *y; 207 208 PetscFunctionBegin; 209 /* If there are no nonzero entries, zero yy and return immediately. */ 210 if (!a->nz) { 211 PetscCall(VecSet(yy, 0.0)); 212 PetscFunctionReturn(0); 213 } 214 215 PetscCall(VecGetArrayRead(xx, &x)); 216 PetscCall(VecGetArray(yy, &y)); 217 218 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 219 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 220 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 221 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 222 223 /* Call MKL SpMV2 executor routine to do the MatMult. */ 224 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 225 226 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 227 PetscCall(VecRestoreArrayRead(xx, &x)); 228 PetscCall(VecRestoreArray(yy, &y)); 229 PetscFunctionReturn(0); 230 } 231 232 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 234 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 235 const PetscScalar *x; 236 PetscScalar *y; 237 238 PetscFunctionBegin; 239 /* If there are no nonzero entries, zero yy and return immediately. */ 240 if (!a->nz) { 241 PetscCall(VecSet(yy, 0.0)); 242 PetscFunctionReturn(0); 243 } 244 245 PetscCall(VecGetArrayRead(xx, &x)); 246 PetscCall(VecGetArray(yy, &y)); 247 248 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 249 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 250 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 251 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 252 253 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 254 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 255 256 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 257 PetscCall(VecRestoreArrayRead(xx, &x)); 258 PetscCall(VecRestoreArray(yy, &y)); 259 PetscFunctionReturn(0); 260 } 261 262 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 264 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 265 const PetscScalar *x; 266 PetscScalar *y, *z; 267 PetscInt m = a->mbs * A->rmap->bs; 268 PetscInt i; 269 270 PetscFunctionBegin; 271 /* If there are no nonzero entries, set zz = yy and return immediately. */ 272 if (!a->nz) { 273 PetscCall(VecCopy(yy, zz)); 274 PetscFunctionReturn(0); 275 } 276 277 PetscCall(VecGetArrayRead(xx, &x)); 278 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 279 280 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 281 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 282 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 283 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 284 285 /* Call MKL sparse BLAS routine to do the MatMult. */ 286 if (zz == yy) { 287 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 288 * with alpha and beta both set to 1.0. */ 289 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 290 } else { 291 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 292 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 293 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 294 for (i = 0; i < m; i++) z[i] += y[i]; 295 } 296 297 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 298 PetscCall(VecRestoreArrayRead(xx, &x)); 299 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 300 PetscFunctionReturn(0); 301 } 302 303 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 305 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 306 const PetscScalar *x; 307 PetscScalar *y, *z; 308 PetscInt n = a->nbs * A->rmap->bs; 309 PetscInt i; 310 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 311 312 PetscFunctionBegin; 313 /* If there are no nonzero entries, set zz = yy and return immediately. */ 314 if (!a->nz) { 315 PetscCall(VecCopy(yy, zz)); 316 PetscFunctionReturn(0); 317 } 318 319 PetscCall(VecGetArrayRead(xx, &x)); 320 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 321 322 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 323 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 324 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 325 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 326 327 /* Call MKL sparse BLAS routine to do the MatMult. */ 328 if (zz == yy) { 329 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 330 * with alpha and beta both set to 1.0. */ 331 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 332 } else { 333 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 334 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 335 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 336 for (i = 0; i < n; i++) z[i] += y[i]; 337 } 338 339 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 340 PetscCall(VecRestoreArrayRead(xx, &x)); 341 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 342 PetscFunctionReturn(0); 343 } 344 345 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) { 346 PetscFunctionBegin; 347 PetscCall(MatScale_SeqBAIJ(inA, alpha)); 348 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 349 PetscFunctionReturn(0); 350 } 351 352 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) { 353 PetscFunctionBegin; 354 PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr)); 355 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 356 PetscFunctionReturn(0); 357 } 358 359 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) { 360 PetscFunctionBegin; 361 PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str)); 362 if (str == SAME_NONZERO_PATTERN) { 363 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 364 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 365 } 366 PetscFunctionReturn(0); 367 } 368 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 369 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 370 * routine, but can also be used to convert an assembled SeqBAIJ matrix 371 * into a SeqBAIJMKL one. */ 372 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 373 Mat B = *newmat; 374 Mat_SeqBAIJMKL *baijmkl; 375 PetscBool sametype; 376 377 PetscFunctionBegin; 378 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 379 380 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 381 if (sametype) PetscFunctionReturn(0); 382 383 PetscCall(PetscNewLog(B, &baijmkl)); 384 B->spptr = (void *)baijmkl; 385 386 /* Set function pointers for methods that we inherit from BAIJ but override. 387 * We also parse some command line options below, since those determine some of the methods we point to. */ 388 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 389 390 baijmkl->sparse_optimized = PETSC_FALSE; 391 392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL)); 393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ)); 394 395 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL)); 396 *newmat = B; 397 PetscFunctionReturn(0); 398 } 399 400 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) { 401 PetscFunctionBegin; 402 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 403 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 404 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 405 A->ops->destroy = MatDestroy_SeqBAIJMKL; 406 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 407 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 408 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 409 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 410 A->ops->scale = MatScale_SeqBAIJMKL; 411 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 412 A->ops->axpy = MatAXPY_SeqBAIJMKL; 413 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 414 PetscFunctionReturn(0); 415 } 416 417 /*@C 418 MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`. 419 This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS 420 routines from Intel MKL whenever possible. 421 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 422 operations are currently supported. 423 If the installed version of MKL supports the "SpMV2" sparse 424 inspector-executor routines, then those are used by default. 425 Default PETSc kernels are used otherwise. 426 427 Input Parameters: 428 + comm - MPI communicator, set to `PETSC_COMM_SELF` 429 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 430 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 431 . m - number of rows 432 . n - number of columns 433 . nz - number of nonzero blocks per block row (same for all rows) 434 - nnz - array containing the number of nonzero blocks in the various block rows 435 (possibly different for each block row) or NULL 436 437 Output Parameter: 438 . A - the matrix 439 440 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 441 MatXXXXSetPreallocation() paradigm instead of this routine directly. 442 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 443 444 Options Database Keys: 445 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 446 - -mat_block_size - size of the blocks to use 447 448 Level: intermediate 449 450 Notes: 451 The number of rows and columns must be divisible by blocksize. 452 453 If the nnz parameter is given then the nz parameter is ignored 454 455 A nonzero block is any block that as 1 or more nonzeros in it 456 457 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 458 storage. That is, the stored row and column indices can begin at 459 either one (as in Fortran) or zero. See the users' manual for details. 460 461 Specify the preallocated storage with either nz or nnz (not both). 462 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 463 allocation. See Users-Manual: ch_mat for details. 464 matrices. 465 466 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 467 @*/ 468 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 469 PetscFunctionBegin; 470 PetscCall(MatCreate(comm, A)); 471 PetscCall(MatSetSizes(*A, m, n, m, n)); 472 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 473 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz)); 474 PetscFunctionReturn(0); 475 } 476 477 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) { 478 PetscFunctionBegin; 479 PetscCall(MatSetType(A, MATSEQBAIJ)); 480 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A)); 481 PetscFunctionReturn(0); 482 } 483