1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30 { 31 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 PetscErrorCode ierr; 34 Mat B = *newmat; 35 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqBAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 45 B->ops->destroy = MatDestroy_SeqBAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 48 B->ops->scale = MatScale_SeqBAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 50 B->ops->axpy = MatAXPY_SeqBAIJ; 51 52 switch (A->rmap->bs) { 53 case 1: 54 B->ops->mult = MatMult_SeqBAIJ_1; 55 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 56 break; 57 case 2: 58 B->ops->mult = MatMult_SeqBAIJ_2; 59 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 60 break; 61 case 3: 62 B->ops->mult = MatMult_SeqBAIJ_3; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 64 break; 65 case 4: 66 B->ops->mult = MatMult_SeqBAIJ_4; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 68 break; 69 case 5: 70 B->ops->mult = MatMult_SeqBAIJ_5; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 72 break; 73 case 6: 74 B->ops->mult = MatMult_SeqBAIJ_6; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 76 break; 77 case 7: 78 B->ops->mult = MatMult_SeqBAIJ_7; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 80 break; 81 case 11: 82 B->ops->mult = MatMult_SeqBAIJ_11; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 84 break; 85 case 15: 86 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 88 break; 89 default: 90 B->ops->mult = MatMult_SeqBAIJ_N; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 92 break; 93 } 94 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 95 96 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 97 * simply involves destroying the MKL sparse matrix handle and then freeing 98 * the spptr pointer. */ 99 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 100 101 if (baijmkl->sparse_optimized) { 102 sparse_status_t stat; 103 stat = mkl_sparse_destroy(baijmkl->bsrA); 104 if (stat != SPARSE_STATUS_SUCCESS) { 105 PetscFunctionReturn(PETSC_ERR_LIB); 106 } 107 } 108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 109 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 110 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 111 #endif 112 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 113 114 /* Change the type of B to MATSEQBAIJ. */ 115 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 116 117 *newmat = B; 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 122 { 123 PetscErrorCode ierr; 124 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 125 126 PetscFunctionBegin; 127 128 if (baijmkl) { 129 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 130 if (baijmkl->sparse_optimized) { 131 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 132 stat = mkl_sparse_destroy(baijmkl->bsrA); 133 if (stat != SPARSE_STATUS_SUCCESS) { 134 PetscFunctionReturn(PETSC_ERR_LIB); 135 } 136 } 137 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 138 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 139 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 140 #endif 141 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 142 } 143 144 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 145 * to destroy everything that remains. */ 146 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 147 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 155 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 156 PetscInt mbs, nbs, nz, bs, i; 157 MatScalar *aa; 158 PetscInt *aj,*ai; 159 sparse_status_t stat; 160 161 PetscFunctionBegin; 162 if (baijmkl->sparse_optimized) { 163 /* Matrix has been previously assembled and optimized. Must destroy old 164 * matrix handle before running the optimization step again. */ 165 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 166 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 167 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 168 #endif 169 stat = mkl_sparse_destroy(baijmkl->bsrA); 170 if (stat != SPARSE_STATUS_SUCCESS) { 171 PetscFunctionReturn(PETSC_ERR_LIB); 172 } 173 } 174 baijmkl->sparse_optimized = PETSC_FALSE; 175 176 /* Now perform the SpMV2 setup and matrix optimization. */ 177 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 178 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 179 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 180 mbs = a->mbs; 181 nbs = a->nbs; 182 nz = a->nz; 183 bs = A->rmap->bs; 184 aa = a->a; 185 186 if (nz) { 187 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 188 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 189 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 190 aj = a->j; 191 ai = a->i; 192 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 193 #else 194 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 195 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 196 for (i=0;i<mbs+1;i++) 197 ai[i]=a->i[i]+1; 198 for (i=0;i<nz;i++) 199 aj[i]=a->j[i]+1; 200 aa = a->a; 201 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 202 baijmkl->ai1 = ai; 203 baijmkl->aj1 = aj; 204 #endif 205 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000); 206 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE); 207 stat = mkl_sparse_optimize(baijmkl->bsrA); 208 if (stat != SPARSE_STATUS_SUCCESS) { 209 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 210 PetscFunctionReturn(PETSC_ERR_LIB); 211 } 212 baijmkl->sparse_optimized = PETSC_TRUE; 213 } 214 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 219 { 220 PetscErrorCode ierr; 221 Mat_SeqBAIJMKL *baijmkl; 222 Mat_SeqBAIJMKL *baijmkl_dest; 223 224 PetscFunctionBegin; 225 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 226 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 227 baijmkl_dest = (Mat_SeqBAIJMKL*) (*M)->spptr; 228 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 229 baijmkl_dest->sparse_optimized = PETSC_FALSE; 230 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 235 { 236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 237 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 238 const PetscScalar *x; 239 PetscScalar *y; 240 PetscErrorCode ierr; 241 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 242 243 PetscFunctionBegin; 244 245 /* If there are no nonzero entries, zero yy and return immediately. */ 246 if(!a->nz) { 247 PetscInt i; 248 PetscInt m=a->mbs*A->rmap->bs; 249 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 250 for (i=0; i<m; i++) { 251 y[i] = 0.0; 252 } 253 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 258 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 259 260 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 261 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 262 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 263 if (!baijmkl->sparse_optimized) { 264 MatSeqBAIJMKL_create_mkl_handle(A); 265 } 266 267 /* Call MKL SpMV2 executor routine to do the MatMult. */ 268 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 269 270 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 272 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 273 if (stat != SPARSE_STATUS_SUCCESS) { 274 PetscFunctionReturn(PETSC_ERR_LIB); 275 } 276 PetscFunctionReturn(0); 277 } 278 279 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 280 { 281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 282 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 283 const PetscScalar *x; 284 PetscScalar *y; 285 PetscErrorCode ierr; 286 sparse_status_t stat; 287 288 PetscFunctionBegin; 289 290 /* If there are no nonzero entries, zero yy and return immediately. */ 291 if(!a->nz) { 292 PetscInt i; 293 PetscInt n=a->nbs; 294 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 y[i] = 0.0; 297 } 298 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 303 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 304 305 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 306 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 307 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 308 if (!baijmkl->sparse_optimized) { 309 MatSeqBAIJMKL_create_mkl_handle(A); 310 } 311 312 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 313 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 314 315 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 316 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 317 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 318 if (stat != SPARSE_STATUS_SUCCESS) { 319 PetscFunctionReturn(PETSC_ERR_LIB); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 328 const PetscScalar *x; 329 PetscScalar *y,*z; 330 PetscErrorCode ierr; 331 PetscInt m=a->mbs*A->rmap->bs; 332 PetscInt i; 333 334 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 335 336 PetscFunctionBegin; 337 338 /* If there are no nonzero entries, set zz = yy and return immediately. */ 339 if(!a->nz) { 340 PetscInt i; 341 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 342 for (i=0; i<m; i++) { 343 z[i] = y[i]; 344 } 345 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 350 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 351 352 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 353 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 354 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 355 if (!baijmkl->sparse_optimized) { 356 MatSeqBAIJMKL_create_mkl_handle(A); 357 } 358 359 /* Call MKL sparse BLAS routine to do the MatMult. */ 360 if (zz == yy) { 361 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 362 * with alpha and beta both set to 1.0. */ 363 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 364 } else { 365 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 366 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 367 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 368 for (i=0; i<m; i++) { 369 z[i] += y[i]; 370 } 371 } 372 373 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 375 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 376 if (stat != SPARSE_STATUS_SUCCESS) { 377 PetscFunctionReturn(PETSC_ERR_LIB); 378 } 379 PetscFunctionReturn(0); 380 } 381 382 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 383 { 384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 385 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 386 const PetscScalar *x; 387 PetscScalar *y,*z; 388 PetscErrorCode ierr; 389 PetscInt n=a->nbs*A->rmap->bs; 390 PetscInt i; 391 392 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 393 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 394 395 PetscFunctionBegin; 396 397 /* If there are no nonzero entries, set zz = yy and return immediately. */ 398 if(!a->nz) { 399 PetscInt i; 400 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 401 for (i=0; i<n; i++) { 402 z[i] = y[i]; 403 } 404 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 409 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 410 411 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 412 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 413 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 414 if (!baijmkl->sparse_optimized) { 415 MatSeqBAIJMKL_create_mkl_handle(A); 416 } 417 418 /* Call MKL sparse BLAS routine to do the MatMult. */ 419 if (zz == yy) { 420 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 421 * with alpha and beta both set to 1.0. */ 422 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 423 } else { 424 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 425 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 426 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 427 for (i=0; i<n; i++) { 428 z[i] += y[i]; 429 } 430 } 431 432 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 434 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 435 if (stat != SPARSE_STATUS_SUCCESS) { 436 PetscFunctionReturn(PETSC_ERR_LIB); 437 } 438 PetscFunctionReturn(0); 439 } 440 441 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 442 { 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 447 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 452 { 453 PetscErrorCode ierr; 454 455 PetscFunctionBegin; 456 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 457 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 462 { 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 467 if (str == SAME_NONZERO_PATTERN) { 468 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 469 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 470 } 471 PetscFunctionReturn(0); 472 } 473 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 474 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 475 * routine, but can also be used to convert an assembled SeqBAIJ matrix 476 * into a SeqBAIJMKL one. */ 477 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 478 { 479 PetscErrorCode ierr; 480 Mat B = *newmat; 481 Mat_SeqBAIJMKL *baijmkl; 482 PetscBool sametype; 483 484 PetscFunctionBegin; 485 if (reuse == MAT_INITIAL_MATRIX) { 486 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 487 } 488 489 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 490 if (sametype) PetscFunctionReturn(0); 491 492 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 493 B->spptr = (void*) baijmkl; 494 495 /* Set function pointers for methods that we inherit from BAIJ but override. 496 * We also parse some command line options below, since those determine some of the methods we point to. */ 497 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 498 499 baijmkl->sparse_optimized = PETSC_FALSE; 500 501 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 502 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 503 504 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 505 *newmat = B; 506 PetscFunctionReturn(0); 507 } 508 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 509 { 510 PetscErrorCode ierr; 511 PetscFunctionBegin; 512 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 513 514 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 515 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 516 A->ops->destroy = MatDestroy_SeqBAIJMKL; 517 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 518 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 519 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 520 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 521 A->ops->scale = MatScale_SeqBAIJMKL; 522 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 523 A->ops->axpy = MatAXPY_SeqBAIJMKL; 524 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 525 PetscFunctionReturn(0); 526 } 527 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 528 /*@C 529 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 530 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 531 routines from Intel MKL whenever possible. 532 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 533 operations are currently supported. 534 If the installed version of MKL supports the "SpMV2" sparse 535 inspector-executor routines, then those are used by default. 536 Default PETSc kernels are used otherwise. 537 538 Input Parameters: 539 + comm - MPI communicator, set to PETSC_COMM_SELF 540 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 541 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 542 . m - number of rows 543 . n - number of columns 544 . nz - number of nonzero blocks per block row (same for all rows) 545 - nnz - array containing the number of nonzero blocks in the various block rows 546 (possibly different for each block row) or NULL 547 548 549 Output Parameter: 550 . A - the matrix 551 552 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 553 MatXXXXSetPreallocation() paradgm instead of this routine directly. 554 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 555 556 Options Database Keys: 557 . -mat_no_unroll - uses code that does not unroll the loops in the 558 block calculations (much slower) 559 . -mat_block_size - size of the blocks to use 560 561 Level: intermediate 562 563 Notes: 564 The number of rows and columns must be divisible by blocksize. 565 566 If the nnz parameter is given then the nz parameter is ignored 567 568 A nonzero block is any block that as 1 or more nonzeros in it 569 570 The block AIJ format is fully compatible with standard Fortran 77 571 storage. That is, the stored row and column indices can begin at 572 either one (as in Fortran) or zero. See the users' manual for details. 573 574 Specify the preallocated storage with either nz or nnz (not both). 575 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 576 allocation. See Users-Manual: ch_mat for details. 577 matrices. 578 579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 580 581 @*/ 582 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 583 { 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 ierr = MatCreate(comm,A);CHKERRQ(ierr); 588 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 589 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 590 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 591 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 592 #else 593 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 594 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 595 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 596 #endif 597 PetscFunctionReturn(0); 598 } 599 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 600 { 601 PetscErrorCode ierr; 602 603 PetscFunctionBegin; 604 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 605 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 606 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 607 #else 608 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 609 #endif 610 PetscFunctionReturn(0); 611 } 612