1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 #include <mkl_spblas.h> 11 12 static PetscBool PetscSeqBAIJSupportsZeroBased(void) 13 { 14 static PetscBool set = PETSC_FALSE,value; 15 int n=1,ia[1],ja[1]; 16 float a[1]; 17 sparse_status_t status; 18 sparse_matrix_t A; 19 20 if (!set) { 21 status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a); 22 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 23 (void) mkl_sparse_destroy(A); 24 set = PETSC_TRUE; 25 } 26 return value; 27 } 28 29 typedef struct { 30 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 31 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 32 struct matrix_descr descr; 33 PetscInt *ai1; 34 PetscInt *aj1; 35 } Mat_SeqBAIJMKL; 36 37 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 38 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 39 40 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 41 { 42 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 43 /* so we will ignore 'MatType type'. */ 44 Mat B = *newmat; 45 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 46 47 PetscFunctionBegin; 48 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 49 50 /* Reset the original function pointers. */ 51 B->ops->duplicate = MatDuplicate_SeqBAIJ; 52 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 53 B->ops->destroy = MatDestroy_SeqBAIJ; 54 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 55 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 56 B->ops->scale = MatScale_SeqBAIJ; 57 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 58 B->ops->axpy = MatAXPY_SeqBAIJ; 59 60 switch (A->rmap->bs) { 61 case 1: 62 B->ops->mult = MatMult_SeqBAIJ_1; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 64 break; 65 case 2: 66 B->ops->mult = MatMult_SeqBAIJ_2; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 68 break; 69 case 3: 70 B->ops->mult = MatMult_SeqBAIJ_3; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 72 break; 73 case 4: 74 B->ops->mult = MatMult_SeqBAIJ_4; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 76 break; 77 case 5: 78 B->ops->mult = MatMult_SeqBAIJ_5; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 80 break; 81 case 6: 82 B->ops->mult = MatMult_SeqBAIJ_6; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 84 break; 85 case 7: 86 B->ops->mult = MatMult_SeqBAIJ_7; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 88 break; 89 case 11: 90 B->ops->mult = MatMult_SeqBAIJ_11; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 92 break; 93 case 15: 94 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 95 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 96 break; 97 default: 98 B->ops->mult = MatMult_SeqBAIJ_N; 99 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 100 break; 101 } 102 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL)); 103 104 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 105 * simply involves destroying the MKL sparse matrix handle and then freeing 106 * the spptr pointer. */ 107 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 108 109 if (baijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,baijmkl->bsrA); 110 PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 111 PetscCall(PetscFree(B->spptr)); 112 113 /* Change the type of B to MATSEQBAIJ. */ 114 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 115 116 *newmat = B; 117 PetscFunctionReturn(0); 118 } 119 120 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 121 { 122 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 123 124 PetscFunctionBegin; 125 if (baijmkl) { 126 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 127 if (baijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,baijmkl->bsrA); 128 PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 129 PetscCall(PetscFree(A->spptr)); 130 } 131 132 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 133 * to destroy everything that remains. */ 134 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 135 PetscCall(MatDestroy_SeqBAIJ(A)); 136 PetscFunctionReturn(0); 137 } 138 139 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 143 PetscInt mbs, nbs, nz, bs; 144 MatScalar *aa; 145 PetscInt *aj,*ai; 146 PetscInt i; 147 148 PetscFunctionBegin; 149 if (baijmkl->sparse_optimized) { 150 /* Matrix has been previously assembled and optimized. Must destroy old 151 * matrix handle before running the optimization step again. */ 152 PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1)); 153 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 154 } 155 baijmkl->sparse_optimized = PETSC_FALSE; 156 157 /* Now perform the SpMV2 setup and matrix optimization. */ 158 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 159 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 160 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 161 mbs = a->mbs; 162 nbs = a->nbs; 163 nz = a->nz; 164 bs = A->rmap->bs; 165 aa = a->a; 166 167 if ((nz!=0) & !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 if (PetscSeqBAIJSupportsZeroBased()) { 171 aj = a->j; 172 ai = a->i; 173 PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa)); 174 } else { 175 PetscCall(PetscMalloc2(mbs+1,&ai,nz,&aj)); 176 for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1; 177 for (i=0;i<nz;i++) aj[i] = a->j[i]+1; 178 aa = a->a; 179 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa)); 180 baijmkl->ai1 = ai; 181 baijmkl->aj1 = aj; 182 } 183 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000)); 184 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE)); 185 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 186 baijmkl->sparse_optimized = PETSC_TRUE; 187 } 188 PetscFunctionReturn(0); 189 } 190 191 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 192 { 193 Mat_SeqBAIJMKL *baijmkl; 194 Mat_SeqBAIJMKL *baijmkl_dest; 195 196 PetscFunctionBegin; 197 PetscCall(MatDuplicate_SeqBAIJ(A,op,M)); 198 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 199 PetscCall(PetscNewLog((*M),&baijmkl_dest)); 200 (*M)->spptr = (void*)baijmkl_dest; 201 PetscCall(PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL))); 202 baijmkl_dest->sparse_optimized = PETSC_FALSE; 203 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 208 { 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 210 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 211 const PetscScalar *x; 212 PetscScalar *y; 213 214 PetscFunctionBegin; 215 /* If there are no nonzero entries, zero yy and return immediately. */ 216 if (!a->nz) { 217 PetscCall(VecSet(yy,0.0)); 218 PetscFunctionReturn(0); 219 } 220 221 PetscCall(VecGetArrayRead(xx,&x)); 222 PetscCall(VecGetArray(yy,&y)); 223 224 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 225 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 226 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 227 if (!baijmkl->sparse_optimized) { 228 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 229 } 230 231 /* Call MKL SpMV2 executor routine to do the MatMult. */ 232 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y)); 233 234 PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs)); 235 PetscCall(VecRestoreArrayRead(xx,&x)); 236 PetscCall(VecRestoreArray(yy,&y)); 237 PetscFunctionReturn(0); 238 } 239 240 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 244 const PetscScalar *x; 245 PetscScalar *y; 246 247 PetscFunctionBegin; 248 /* If there are no nonzero entries, zero yy and return immediately. */ 249 if (!a->nz) { 250 PetscCall(VecSet(yy,0.0)); 251 PetscFunctionReturn(0); 252 } 253 254 PetscCall(VecGetArrayRead(xx,&x)); 255 PetscCall(VecGetArray(yy,&y)); 256 257 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 258 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 259 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 260 if (!baijmkl->sparse_optimized) { 261 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 262 } 263 264 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 265 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y)); 266 267 PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs)); 268 PetscCall(VecRestoreArrayRead(xx,&x)); 269 PetscCall(VecRestoreArray(yy,&y)); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 277 const PetscScalar *x; 278 PetscScalar *y,*z; 279 PetscInt m=a->mbs*A->rmap->bs; 280 PetscInt i; 281 282 PetscFunctionBegin; 283 /* If there are no nonzero entries, set zz = yy and return immediately. */ 284 if (!a->nz) { 285 PetscCall(VecCopy(yy,zz)); 286 PetscFunctionReturn(0); 287 } 288 289 PetscCall(VecGetArrayRead(xx,&x)); 290 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 291 292 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 293 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 294 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 295 if (!baijmkl->sparse_optimized) { 296 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 297 } 298 299 /* Call MKL sparse BLAS routine to do the MatMult. */ 300 if (zz == yy) { 301 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 302 * with alpha and beta both set to 1.0. */ 303 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z)); 304 } else { 305 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 306 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 307 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z)); 308 for (i=0; i<m; i++) { 309 z[i] += y[i]; 310 } 311 } 312 313 PetscCall(PetscLogFlops(2.0*a->bs2*a->nz)); 314 PetscCall(VecRestoreArrayRead(xx,&x)); 315 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 316 PetscFunctionReturn(0); 317 } 318 319 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 320 { 321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 322 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 323 const PetscScalar *x; 324 PetscScalar *y,*z; 325 PetscInt n=a->nbs*A->rmap->bs; 326 PetscInt i; 327 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 328 329 PetscFunctionBegin; 330 /* If there are no nonzero entries, set zz = yy and return immediately. */ 331 if (!a->nz) { 332 PetscCall(VecCopy(yy,zz)); 333 PetscFunctionReturn(0); 334 } 335 336 PetscCall(VecGetArrayRead(xx,&x)); 337 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 338 339 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 340 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 341 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 342 if (!baijmkl->sparse_optimized) { 343 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 344 } 345 346 /* Call MKL sparse BLAS routine to do the MatMult. */ 347 if (zz == yy) { 348 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 349 * with alpha and beta both set to 1.0. */ 350 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z)); 351 } else { 352 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 353 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 354 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z)); 355 for (i=0; i<n; i++) { 356 z[i] += y[i]; 357 } 358 } 359 360 PetscCall(PetscLogFlops(2.0*a->bs2*a->nz)); 361 PetscCall(VecRestoreArrayRead(xx,&x)); 362 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 363 PetscFunctionReturn(0); 364 } 365 366 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 367 { 368 PetscFunctionBegin; 369 PetscCall(MatScale_SeqBAIJ(inA,alpha)); 370 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 371 PetscFunctionReturn(0); 372 } 373 374 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 375 { 376 PetscFunctionBegin; 377 PetscCall(MatDiagonalScale_SeqBAIJ(A,ll,rr)); 378 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 379 PetscFunctionReturn(0); 380 } 381 382 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 383 { 384 PetscFunctionBegin; 385 PetscCall(MatAXPY_SeqBAIJ(Y,a,X,str)); 386 if (str == SAME_NONZERO_PATTERN) { 387 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 388 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 389 } 390 PetscFunctionReturn(0); 391 } 392 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 393 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 394 * routine, but can also be used to convert an assembled SeqBAIJ matrix 395 * into a SeqBAIJMKL one. */ 396 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 397 { 398 Mat B = *newmat; 399 Mat_SeqBAIJMKL *baijmkl; 400 PetscBool sametype; 401 402 PetscFunctionBegin; 403 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 404 405 PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 406 if (sametype) PetscFunctionReturn(0); 407 408 PetscCall(PetscNewLog(B,&baijmkl)); 409 B->spptr = (void*)baijmkl; 410 411 /* Set function pointers for methods that we inherit from BAIJ but override. 412 * We also parse some command line options below, since those determine some of the methods we point to. */ 413 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 414 415 baijmkl->sparse_optimized = PETSC_FALSE; 416 417 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL)); 418 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ)); 419 420 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL)); 421 *newmat = B; 422 PetscFunctionReturn(0); 423 } 424 425 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 426 { 427 PetscFunctionBegin; 428 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 429 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 430 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 431 A->ops->destroy = MatDestroy_SeqBAIJMKL; 432 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 433 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 434 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 435 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 436 A->ops->scale = MatScale_SeqBAIJMKL; 437 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 438 A->ops->axpy = MatAXPY_SeqBAIJMKL; 439 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 440 PetscFunctionReturn(0); 441 } 442 443 /*@C 444 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 445 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 446 routines from Intel MKL whenever possible. 447 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 448 operations are currently supported. 449 If the installed version of MKL supports the "SpMV2" sparse 450 inspector-executor routines, then those are used by default. 451 Default PETSc kernels are used otherwise. 452 453 Input Parameters: 454 + comm - MPI communicator, set to PETSC_COMM_SELF 455 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 456 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 457 . m - number of rows 458 . n - number of columns 459 . nz - number of nonzero blocks per block row (same for all rows) 460 - nnz - array containing the number of nonzero blocks in the various block rows 461 (possibly different for each block row) or NULL 462 463 Output Parameter: 464 . A - the matrix 465 466 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 467 MatXXXXSetPreallocation() paradigm instead of this routine directly. 468 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 469 470 Options Database Keys: 471 + -mat_no_unroll - uses code that does not unroll the loops in the 472 block calculations (much slower) 473 - -mat_block_size - size of the blocks to use 474 475 Level: intermediate 476 477 Notes: 478 The number of rows and columns must be divisible by blocksize. 479 480 If the nnz parameter is given then the nz parameter is ignored 481 482 A nonzero block is any block that as 1 or more nonzeros in it 483 484 The block AIJ format is fully compatible with standard Fortran 77 485 storage. That is, the stored row and column indices can begin at 486 either one (as in Fortran) or zero. See the users' manual for details. 487 488 Specify the preallocated storage with either nz or nnz (not both). 489 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 490 allocation. See Users-Manual: ch_mat for details. 491 matrices. 492 493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 494 495 @*/ 496 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 497 { 498 PetscFunctionBegin; 499 PetscCall(MatCreate(comm,A)); 500 PetscCall(MatSetSizes(*A,m,n,m,n)); 501 PetscCall(MatSetType(*A,MATSEQBAIJMKL)); 502 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz)); 503 PetscFunctionReturn(0); 504 } 505 506 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 507 { 508 PetscFunctionBegin; 509 PetscCall(MatSetType(A,MATSEQBAIJ)); 510 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A)); 511 PetscFunctionReturn(0); 512 } 513