1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30 { 31 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 PetscErrorCode ierr; 34 Mat B = *newmat; 35 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqBAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 45 B->ops->destroy = MatDestroy_SeqBAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 48 B->ops->scale = MatScale_SeqBAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 50 B->ops->axpy = MatAXPY_SeqBAIJ; 51 52 switch (A->rmap->bs) { 53 case 1: 54 B->ops->mult = MatMult_SeqBAIJ_1; 55 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 56 break; 57 case 2: 58 B->ops->mult = MatMult_SeqBAIJ_2; 59 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 60 break; 61 case 3: 62 B->ops->mult = MatMult_SeqBAIJ_3; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 64 break; 65 case 4: 66 B->ops->mult = MatMult_SeqBAIJ_4; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 68 break; 69 case 5: 70 B->ops->mult = MatMult_SeqBAIJ_5; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 72 break; 73 case 6: 74 B->ops->mult = MatMult_SeqBAIJ_6; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 76 break; 77 case 7: 78 B->ops->mult = MatMult_SeqBAIJ_7; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 80 break; 81 case 11: 82 B->ops->mult = MatMult_SeqBAIJ_11; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 84 break; 85 case 15: 86 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 88 break; 89 default: 90 B->ops->mult = MatMult_SeqBAIJ_N; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 92 break; 93 } 94 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 95 96 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 97 * simply involves destroying the MKL sparse matrix handle and then freeing 98 * the spptr pointer. */ 99 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 100 101 if (baijmkl->sparse_optimized) { 102 sparse_status_t stat; 103 stat = mkl_sparse_destroy(baijmkl->bsrA); 104 if (stat != SPARSE_STATUS_SUCCESS) { 105 PetscFunctionReturn(PETSC_ERR_LIB); 106 } 107 } 108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 109 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 110 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 111 #endif 112 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 113 114 /* Change the type of B to MATSEQBAIJ. */ 115 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 116 117 *newmat = B; 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 122 { 123 PetscErrorCode ierr; 124 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 125 126 PetscFunctionBegin; 127 128 if (baijmkl) { 129 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 130 if (baijmkl->sparse_optimized) { 131 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 132 stat = mkl_sparse_destroy(baijmkl->bsrA); 133 if (stat != SPARSE_STATUS_SUCCESS) { 134 PetscFunctionReturn(PETSC_ERR_LIB); 135 } 136 } 137 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 138 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 139 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 140 #endif 141 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 142 } 143 144 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 145 * to destroy everything that remains. */ 146 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 147 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 155 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 156 PetscInt mbs, nbs, nz, bs, i; 157 MatScalar *aa; 158 PetscInt *aj,*ai; 159 sparse_status_t stat; 160 161 PetscFunctionBegin; 162 if (baijmkl->sparse_optimized) { 163 /* Matrix has been previously assembled and optimized. Must destroy old 164 * matrix handle before running the optimization step again. */ 165 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 166 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 167 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 168 #endif 169 stat = mkl_sparse_destroy(baijmkl->bsrA); 170 if (stat != SPARSE_STATUS_SUCCESS) { 171 PetscFunctionReturn(PETSC_ERR_LIB); 172 } 173 } 174 baijmkl->sparse_optimized = PETSC_FALSE; 175 176 /* Now perform the SpMV2 setup and matrix optimization. */ 177 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 178 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 179 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 180 mbs = a->mbs; 181 nbs = a->nbs; 182 nz = a->nz; 183 bs = A->rmap->bs; 184 aa = a->a; 185 186 if (nz) { 187 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 188 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 189 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 190 aj = a->j; 191 ai = a->i; 192 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 193 #else 194 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 195 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 196 for (i=0;i<mbs+1;i++) 197 ai[i]=a->i[i]+1; 198 for (i=0;i<nz;i++) 199 aj[i]=a->j[i]+1; 200 aa = a->a; 201 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa); 202 baijmkl->ai1 = ai; 203 baijmkl->aj1 = aj; 204 #endif 205 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000); 206 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE); 207 stat = mkl_sparse_optimize(baijmkl->bsrA); 208 if (stat != SPARSE_STATUS_SUCCESS) { 209 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 210 PetscFunctionReturn(PETSC_ERR_LIB); 211 } 212 baijmkl->sparse_optimized = PETSC_TRUE; 213 } 214 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 219 { 220 PetscErrorCode ierr; 221 Mat_SeqBAIJMKL *baijmkl; 222 Mat_SeqBAIJMKL *baijmkl_dest; 223 224 PetscFunctionBegin; 225 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 226 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 227 ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr); 228 (*M)->spptr = (void*)baijmkl_dest; 229 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 230 baijmkl_dest->sparse_optimized = PETSC_FALSE; 231 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 239 const PetscScalar *x; 240 PetscScalar *y; 241 PetscErrorCode ierr; 242 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 243 244 PetscFunctionBegin; 245 246 /* If there are no nonzero entries, zero yy and return immediately. */ 247 if(!a->nz) { 248 PetscInt i; 249 PetscInt m=a->mbs*A->rmap->bs; 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 for (i=0; i<m; i++) { 252 y[i] = 0.0; 253 } 254 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 260 261 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 262 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 263 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 264 if (!baijmkl->sparse_optimized) { 265 MatSeqBAIJMKL_create_mkl_handle(A); 266 } 267 268 /* Call MKL SpMV2 executor routine to do the MatMult. */ 269 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 270 271 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 272 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 273 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 274 if (stat != SPARSE_STATUS_SUCCESS) { 275 PetscFunctionReturn(PETSC_ERR_LIB); 276 } 277 PetscFunctionReturn(0); 278 } 279 280 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 281 { 282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 283 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 284 const PetscScalar *x; 285 PetscScalar *y; 286 PetscErrorCode ierr; 287 sparse_status_t stat; 288 289 PetscFunctionBegin; 290 291 /* If there are no nonzero entries, zero yy and return immediately. */ 292 if(!a->nz) { 293 PetscInt i; 294 PetscInt n=a->nbs; 295 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 296 for (i=0; i<n; i++) { 297 y[i] = 0.0; 298 } 299 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 305 306 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 307 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 308 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 309 if (!baijmkl->sparse_optimized) { 310 MatSeqBAIJMKL_create_mkl_handle(A); 311 } 312 313 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 314 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y); 315 316 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 319 if (stat != SPARSE_STATUS_SUCCESS) { 320 PetscFunctionReturn(PETSC_ERR_LIB); 321 } 322 PetscFunctionReturn(0); 323 } 324 325 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 328 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 329 const PetscScalar *x; 330 PetscScalar *y,*z; 331 PetscErrorCode ierr; 332 PetscInt m=a->mbs*A->rmap->bs; 333 PetscInt i; 334 335 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 336 337 PetscFunctionBegin; 338 339 /* If there are no nonzero entries, set zz = yy and return immediately. */ 340 if(!a->nz) { 341 PetscInt i; 342 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 343 for (i=0; i<m; i++) { 344 z[i] = y[i]; 345 } 346 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 352 353 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 354 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 355 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 356 if (!baijmkl->sparse_optimized) { 357 MatSeqBAIJMKL_create_mkl_handle(A); 358 } 359 360 /* Call MKL sparse BLAS routine to do the MatMult. */ 361 if (zz == yy) { 362 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 363 * with alpha and beta both set to 1.0. */ 364 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 365 } else { 366 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 367 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 368 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 369 for (i=0; i<m; i++) { 370 z[i] += y[i]; 371 } 372 } 373 374 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 375 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 376 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 377 if (stat != SPARSE_STATUS_SUCCESS) { 378 PetscFunctionReturn(PETSC_ERR_LIB); 379 } 380 PetscFunctionReturn(0); 381 } 382 383 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 384 { 385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 386 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 387 const PetscScalar *x; 388 PetscScalar *y,*z; 389 PetscErrorCode ierr; 390 PetscInt n=a->nbs*A->rmap->bs; 391 PetscInt i; 392 393 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 394 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 395 396 PetscFunctionBegin; 397 398 /* If there are no nonzero entries, set zz = yy and return immediately. */ 399 if(!a->nz) { 400 PetscInt i; 401 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 402 for (i=0; i<n; i++) { 403 z[i] = y[i]; 404 } 405 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 410 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 411 412 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 413 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 414 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 415 if (!baijmkl->sparse_optimized) { 416 MatSeqBAIJMKL_create_mkl_handle(A); 417 } 418 419 /* Call MKL sparse BLAS routine to do the MatMult. */ 420 if (zz == yy) { 421 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 422 * with alpha and beta both set to 1.0. */ 423 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z); 424 } else { 425 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 426 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 427 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z); 428 for (i=0; i<n; i++) { 429 z[i] += y[i]; 430 } 431 } 432 433 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 435 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 436 if (stat != SPARSE_STATUS_SUCCESS) { 437 PetscFunctionReturn(PETSC_ERR_LIB); 438 } 439 PetscFunctionReturn(0); 440 } 441 442 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 443 { 444 PetscErrorCode ierr; 445 446 PetscFunctionBegin; 447 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 448 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 453 { 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 458 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 463 { 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 468 if (str == SAME_NONZERO_PATTERN) { 469 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 470 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 471 } 472 PetscFunctionReturn(0); 473 } 474 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 475 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 476 * routine, but can also be used to convert an assembled SeqBAIJ matrix 477 * into a SeqBAIJMKL one. */ 478 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 479 { 480 PetscErrorCode ierr; 481 Mat B = *newmat; 482 Mat_SeqBAIJMKL *baijmkl; 483 PetscBool sametype; 484 485 PetscFunctionBegin; 486 if (reuse == MAT_INITIAL_MATRIX) { 487 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 488 } 489 490 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 491 if (sametype) PetscFunctionReturn(0); 492 493 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 494 B->spptr = (void*) baijmkl; 495 496 /* Set function pointers for methods that we inherit from BAIJ but override. 497 * We also parse some command line options below, since those determine some of the methods we point to. */ 498 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 499 500 baijmkl->sparse_optimized = PETSC_FALSE; 501 502 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 503 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 504 505 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 506 *newmat = B; 507 PetscFunctionReturn(0); 508 } 509 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 510 { 511 PetscErrorCode ierr; 512 PetscFunctionBegin; 513 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 514 515 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 516 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 517 A->ops->destroy = MatDestroy_SeqBAIJMKL; 518 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 519 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 520 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 521 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 522 A->ops->scale = MatScale_SeqBAIJMKL; 523 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 524 A->ops->axpy = MatAXPY_SeqBAIJMKL; 525 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 526 PetscFunctionReturn(0); 527 } 528 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 529 /*@C 530 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 531 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 532 routines from Intel MKL whenever possible. 533 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 534 operations are currently supported. 535 If the installed version of MKL supports the "SpMV2" sparse 536 inspector-executor routines, then those are used by default. 537 Default PETSc kernels are used otherwise. 538 539 Input Parameters: 540 + comm - MPI communicator, set to PETSC_COMM_SELF 541 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 542 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 543 . m - number of rows 544 . n - number of columns 545 . nz - number of nonzero blocks per block row (same for all rows) 546 - nnz - array containing the number of nonzero blocks in the various block rows 547 (possibly different for each block row) or NULL 548 549 550 Output Parameter: 551 . A - the matrix 552 553 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 554 MatXXXXSetPreallocation() paradgm instead of this routine directly. 555 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 556 557 Options Database Keys: 558 . -mat_no_unroll - uses code that does not unroll the loops in the 559 block calculations (much slower) 560 . -mat_block_size - size of the blocks to use 561 562 Level: intermediate 563 564 Notes: 565 The number of rows and columns must be divisible by blocksize. 566 567 If the nnz parameter is given then the nz parameter is ignored 568 569 A nonzero block is any block that as 1 or more nonzeros in it 570 571 The block AIJ format is fully compatible with standard Fortran 77 572 storage. That is, the stored row and column indices can begin at 573 either one (as in Fortran) or zero. See the users' manual for details. 574 575 Specify the preallocated storage with either nz or nnz (not both). 576 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 577 allocation. See Users-Manual: ch_mat for details. 578 matrices. 579 580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 581 582 @*/ 583 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 584 { 585 PetscErrorCode ierr; 586 587 PetscFunctionBegin; 588 ierr = MatCreate(comm,A);CHKERRQ(ierr); 589 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 590 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 591 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 592 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 593 #else 594 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 595 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 596 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 597 #endif 598 PetscFunctionReturn(0); 599 } 600 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 601 { 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 606 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 607 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 608 #else 609 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 610 #endif 611 PetscFunctionReturn(0); 612 } 613