1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 } 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqBAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 44 B->ops->destroy = MatDestroy_SeqBAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 47 B->ops->scale = MatScale_SeqBAIJ; 48 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 49 B->ops->axpy = MatAXPY_SeqBAIJ; 50 51 switch (A->rmap->bs) { 52 case 1: 53 B->ops->mult = MatMult_SeqBAIJ_1; 54 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 55 break; 56 case 2: 57 B->ops->mult = MatMult_SeqBAIJ_2; 58 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 59 break; 60 case 3: 61 B->ops->mult = MatMult_SeqBAIJ_3; 62 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 63 break; 64 case 4: 65 B->ops->mult = MatMult_SeqBAIJ_4; 66 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 67 break; 68 case 5: 69 B->ops->mult = MatMult_SeqBAIJ_5; 70 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 71 break; 72 case 6: 73 B->ops->mult = MatMult_SeqBAIJ_6; 74 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 75 break; 76 case 7: 77 B->ops->mult = MatMult_SeqBAIJ_7; 78 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 79 break; 80 case 11: 81 B->ops->mult = MatMult_SeqBAIJ_11; 82 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 83 break; 84 case 15: 85 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 86 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 87 break; 88 default: 89 B->ops->mult = MatMult_SeqBAIJ_N; 90 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 91 break; 92 } 93 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 94 95 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 96 * simply involves destroying the MKL sparse matrix handle and then freeing 97 * the spptr pointer. */ 98 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 99 100 if (baijmkl->sparse_optimized) { 101 sparse_status_t stat; 102 stat = mkl_sparse_destroy(baijmkl->bsrA); 103 if (stat != SPARSE_STATUS_SUCCESS) { 104 PetscFunctionReturn(PETSC_ERR_LIB); 105 } 106 } 107 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 108 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 109 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 110 #endif 111 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 112 113 /* Change the type of B to MATSEQBAIJ. */ 114 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 115 116 *newmat = B; 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 121 { 122 PetscErrorCode ierr; 123 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 124 125 PetscFunctionBegin; 126 127 if (baijmkl) { 128 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 129 if (baijmkl->sparse_optimized) { 130 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 131 stat = mkl_sparse_destroy(baijmkl->bsrA); 132 if (stat != SPARSE_STATUS_SUCCESS) { 133 PetscFunctionReturn(PETSC_ERR_LIB); 134 } 135 } 136 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 137 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 138 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 139 #endif 140 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 141 } 142 143 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 144 * to destroy everything that remains. */ 145 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 146 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 155 PetscInt mbs, nbs, nz, bs, i; 156 MatScalar *aa; 157 PetscInt *aj,*ai; 158 sparse_status_t stat; 159 160 PetscFunctionBegin; 161 if (baijmkl->sparse_optimized) { 162 /* Matrix has been previously assembled and optimized. Must destroy old 163 * matrix handle before running the optimization step again. */ 164 stat = mkl_sparse_destroy(baijmkl->bsrA); 165 if (stat != SPARSE_STATUS_SUCCESS) { 166 PetscFunctionReturn(PETSC_ERR_LIB); 167 } 168 } 169 baijmkl->sparse_optimized = PETSC_FALSE; 170 171 /* Now perform the SpMV2 setup and matrix optimization. */ 172 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 173 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 174 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 175 mbs = a->mbs; 176 nbs = a->nbs; 177 nz = a->nz; 178 bs = A->rmap->bs; 179 aa = a->a; 180 181 if (nz) { 182 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 183 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 184 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 185 aj = a->j; 186 ai = a->i; 187 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 188 #else 189 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 190 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 191 for (i=0;i<mbs+1;i++) 192 ai[i]=a->i[i]+1; 193 for (i=0;i<nz;i++) 194 aj[i]=a->j[i]+1; 195 aa = a->a; 196 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 197 baijmkl->ai1 = ai; 198 baijmkl->aj1 = aj; 199 #endif 200 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000); 201 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE); 202 stat = mkl_sparse_optimize(baijmkl->bsrA); 203 if (stat != SPARSE_STATUS_SUCCESS) { 204 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 205 PetscFunctionReturn(PETSC_ERR_LIB); 206 } 207 baijmkl->sparse_optimized = PETSC_TRUE; 208 } 209 210 PetscFunctionReturn(0); 211 } 212 213 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 214 { 215 PetscErrorCode ierr; 216 Mat_SeqBAIJMKL *baijmkl; 217 Mat_SeqBAIJMKL *baijmkl_dest; 218 219 PetscFunctionBegin; 220 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 221 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 222 baijmkl_dest = (Mat_SeqBAIJMKL*) (*M)->spptr; 223 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 224 baijmkl_dest->sparse_optimized = PETSC_FALSE; 225 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 230 { 231 PetscErrorCode ierr; 232 PetscFunctionBegin; 233 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 234 235 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 236 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 237 238 PetscFunctionReturn(0); 239 } 240 241 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 245 const PetscScalar *x; 246 PetscScalar *y; 247 PetscErrorCode ierr; 248 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 249 250 PetscFunctionBegin; 251 252 /* If there are no nonzero entries, zero yy and return immediately. */ 253 if(!a->nz) { 254 PetscInt i; 255 PetscInt m=a->mbs*A->rmap->bs; 256 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 257 for (i=0; i<m; i++) { 258 y[i] = 0.0; 259 } 260 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 266 267 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 268 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 269 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 270 if (!baijmkl->sparse_optimized) { 271 MatSeqBAIJMKL_create_mkl_handle(A); 272 } 273 274 /* Call MKL SpMV2 executor routine to do the MatMult. */ 275 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 276 277 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 278 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 if (stat != SPARSE_STATUS_SUCCESS) { 281 PetscFunctionReturn(PETSC_ERR_LIB); 282 } 283 PetscFunctionReturn(0); 284 } 285 286 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 287 { 288 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 289 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 290 const PetscScalar *x; 291 PetscScalar *y; 292 PetscErrorCode ierr; 293 sparse_status_t stat; 294 295 PetscFunctionBegin; 296 297 /* If there are no nonzero entries, zero yy and return immediately. */ 298 if(!a->nz) { 299 PetscInt i; 300 PetscInt n=a->nbs; 301 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 302 for (i=0; i<n; i++) { 303 y[i] = 0.0; 304 } 305 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 310 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 311 312 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 313 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 314 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 315 if (!baijmkl->sparse_optimized) { 316 MatSeqBAIJMKL_create_mkl_handle(A); 317 } 318 319 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 320 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 321 322 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 325 if (stat != SPARSE_STATUS_SUCCESS) { 326 PetscFunctionReturn(PETSC_ERR_LIB); 327 } 328 PetscFunctionReturn(0); 329 } 330 331 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 332 { 333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 335 const PetscScalar *x; 336 PetscScalar *y,*z; 337 PetscErrorCode ierr; 338 PetscInt m=a->mbs*A->rmap->bs; 339 PetscInt i; 340 341 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 342 343 PetscFunctionBegin; 344 345 /* If there are no nonzero entries, set zz = yy and return immediately. */ 346 if(!a->nz) { 347 PetscInt i; 348 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 349 for (i=0; i<m; i++) { 350 z[i] = y[i]; 351 } 352 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 358 359 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 360 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 361 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 362 if (!baijmkl->sparse_optimized) { 363 MatSeqBAIJMKL_create_mkl_handle(A); 364 } 365 366 /* Call MKL sparse BLAS routine to do the MatMult. */ 367 if (zz == yy) { 368 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 369 * with alpha and beta both set to 1.0. */ 370 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 371 } else { 372 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 373 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 374 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 375 for (i=0; i<m; i++) { 376 z[i] += y[i]; 377 } 378 } 379 380 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 381 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 382 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 383 if (stat != SPARSE_STATUS_SUCCESS) { 384 PetscFunctionReturn(PETSC_ERR_LIB); 385 } 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 390 { 391 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 392 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 393 const PetscScalar *x; 394 PetscScalar *y,*z; 395 PetscErrorCode ierr; 396 PetscInt n=a->nbs*A->rmap->bs; 397 PetscInt i; 398 399 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 400 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 401 402 PetscFunctionBegin; 403 404 /* If there are no nonzero entries, set zz = yy and return immediately. */ 405 if(!a->nz) { 406 PetscInt i; 407 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 408 for (i=0; i<n; i++) { 409 z[i] = y[i]; 410 } 411 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 416 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 417 418 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 419 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 420 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 421 if (!baijmkl->sparse_optimized) { 422 MatSeqBAIJMKL_create_mkl_handle(A); 423 } 424 425 /* Call MKL sparse BLAS routine to do the MatMult. */ 426 if (zz == yy) { 427 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 428 * with alpha and beta both set to 1.0. */ 429 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 430 } else { 431 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 432 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 433 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 434 for (i=0; i<n; i++) { 435 z[i] += y[i]; 436 } 437 } 438 439 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 441 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 442 if (stat != SPARSE_STATUS_SUCCESS) { 443 PetscFunctionReturn(PETSC_ERR_LIB); 444 } 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 449 { 450 PetscErrorCode ierr; 451 452 PetscFunctionBegin; 453 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 454 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 459 { 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 464 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 469 { 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 474 if (str == SAME_NONZERO_PATTERN) { 475 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 476 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 477 } 478 PetscFunctionReturn(0); 479 } 480 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 481 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 482 * routine, but can also be used to convert an assembled SeqBAIJ matrix 483 * into a SeqBAIJMKL one. */ 484 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 485 { 486 PetscErrorCode ierr; 487 Mat B = *newmat; 488 Mat_SeqBAIJMKL *baijmkl; 489 PetscBool sametype; 490 491 PetscFunctionBegin; 492 if (reuse == MAT_INITIAL_MATRIX) { 493 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 494 } 495 496 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 497 if (sametype) PetscFunctionReturn(0); 498 499 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 500 B->spptr = (void*) baijmkl; 501 502 /* Set function pointers for methods that we inherit from BAIJ but override. 503 * We also parse some command line options below, since those determine some of the methods we point to. */ 504 B->ops->duplicate = MatDuplicate_SeqBAIJMKL; 505 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 506 B->ops->destroy = MatDestroy_SeqBAIJMKL; 507 B->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 508 B->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 509 B->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 510 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 511 B->ops->scale = MatScale_SeqBAIJMKL; 512 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 513 B->ops->axpy = MatAXPY_SeqBAIJMKL; 514 515 baijmkl->sparse_optimized = PETSC_FALSE; 516 517 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 518 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 519 520 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 521 *newmat = B; 522 PetscFunctionReturn(0); 523 } 524 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 525 /*@C 526 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 527 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 528 routines from Intel MKL whenever possible. 529 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 530 operations are currently supported. 531 If the installed version of MKL supports the "SpMV2" sparse 532 inspector-executor routines, then those are used by default. 533 Default PETSc kernels are used otherwise. 534 535 Input Parameters: 536 + comm - MPI communicator, set to PETSC_COMM_SELF 537 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 538 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 539 . m - number of rows 540 . n - number of columns 541 . nz - number of nonzero blocks per block row (same for all rows) 542 - nnz - array containing the number of nonzero blocks in the various block rows 543 (possibly different for each block row) or NULL 544 545 546 Output Parameter: 547 . A - the matrix 548 549 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 550 MatXXXXSetPreallocation() paradgm instead of this routine directly. 551 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 552 553 Options Database Keys: 554 . -mat_no_unroll - uses code that does not unroll the loops in the 555 block calculations (much slower) 556 . -mat_block_size - size of the blocks to use 557 558 Level: intermediate 559 560 Notes: 561 The number of rows and columns must be divisible by blocksize. 562 563 If the nnz parameter is given then the nz parameter is ignored 564 565 A nonzero block is any block that as 1 or more nonzeros in it 566 567 The block AIJ format is fully compatible with standard Fortran 77 568 storage. That is, the stored row and column indices can begin at 569 either one (as in Fortran) or zero. See the users' manual for details. 570 571 Specify the preallocated storage with either nz or nnz (not both). 572 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 573 allocation. See Users-Manual: ch_mat for details. 574 matrices. 575 576 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 577 578 @*/ 579 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 580 { 581 PetscErrorCode ierr; 582 583 PetscFunctionBegin; 584 ierr = MatCreate(comm,A);CHKERRQ(ierr); 585 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 586 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 587 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 588 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 589 #else 590 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 591 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 592 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 593 #endif 594 PetscFunctionReturn(0); 595 } 596 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 597 { 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 602 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 603 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 604 #else 605 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 606 #endif 607 PetscFunctionReturn(0); 608 } 609