1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30 { 31 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 PetscErrorCode ierr; 34 Mat B = *newmat; 35 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqBAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 45 B->ops->destroy = MatDestroy_SeqBAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 48 B->ops->scale = MatScale_SeqBAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 50 B->ops->axpy = MatAXPY_SeqBAIJ; 51 52 switch (A->rmap->bs) { 53 case 1: 54 B->ops->mult = MatMult_SeqBAIJ_1; 55 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 56 break; 57 case 2: 58 B->ops->mult = MatMult_SeqBAIJ_2; 59 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 60 break; 61 case 3: 62 B->ops->mult = MatMult_SeqBAIJ_3; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 64 break; 65 case 4: 66 B->ops->mult = MatMult_SeqBAIJ_4; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 68 break; 69 case 5: 70 B->ops->mult = MatMult_SeqBAIJ_5; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 72 break; 73 case 6: 74 B->ops->mult = MatMult_SeqBAIJ_6; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 76 break; 77 case 7: 78 B->ops->mult = MatMult_SeqBAIJ_7; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 80 break; 81 case 11: 82 B->ops->mult = MatMult_SeqBAIJ_11; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 84 break; 85 case 15: 86 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 88 break; 89 default: 90 B->ops->mult = MatMult_SeqBAIJ_N; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 92 break; 93 } 94 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 95 96 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 97 * simply involves destroying the MKL sparse matrix handle and then freeing 98 * the spptr pointer. */ 99 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 100 101 if (baijmkl->sparse_optimized) { 102 sparse_status_t stat; 103 stat = mkl_sparse_destroy(baijmkl->bsrA); 104 if (stat != SPARSE_STATUS_SUCCESS) { 105 PetscFunctionReturn(PETSC_ERR_LIB); 106 } 107 } 108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 109 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 110 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 111 #endif 112 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 113 114 /* Change the type of B to MATSEQBAIJ. */ 115 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 116 117 *newmat = B; 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 122 { 123 PetscErrorCode ierr; 124 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 125 126 PetscFunctionBegin; 127 if (baijmkl) { 128 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 129 if (baijmkl->sparse_optimized) { 130 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 131 stat = mkl_sparse_destroy(baijmkl->bsrA); 132 if (stat != SPARSE_STATUS_SUCCESS) { 133 PetscFunctionReturn(PETSC_ERR_LIB); 134 } 135 } 136 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 137 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 138 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 139 #endif 140 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 141 } 142 143 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 144 * to destroy everything that remains. */ 145 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 146 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 155 PetscInt mbs, nbs, nz, bs, i; 156 MatScalar *aa; 157 PetscInt *aj,*ai; 158 sparse_status_t stat; 159 160 PetscFunctionBegin; 161 if (baijmkl->sparse_optimized) { 162 /* Matrix has been previously assembled and optimized. Must destroy old 163 * matrix handle before running the optimization step again. */ 164 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 165 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 166 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 167 #endif 168 stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat); 169 } 170 baijmkl->sparse_optimized = PETSC_FALSE; 171 172 /* Now perform the SpMV2 setup and matrix optimization. */ 173 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 174 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 175 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 176 mbs = a->mbs; 177 nbs = a->nbs; 178 nz = a->nz; 179 bs = A->rmap->bs; 180 aa = a->a; 181 182 if ((nz!=0) & !(A->structure_only)) { 183 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 184 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 185 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 186 aj = a->j; 187 ai = a->i; 188 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 189 #else 190 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 191 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 192 for (i=0;i<mbs+1;i++) 193 ai[i] = a->i[i]+1; 194 for (i=0;i<nz;i++) 195 aj[i] = a->j[i]+1; 196 aa = a->a; 197 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 198 baijmkl->ai1 = ai; 199 baijmkl->aj1 = aj; 200 #endif 201 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat); 202 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat); 203 stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat); 204 baijmkl->sparse_optimized = PETSC_TRUE; 205 } 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 210 { 211 PetscErrorCode ierr; 212 Mat_SeqBAIJMKL *baijmkl; 213 Mat_SeqBAIJMKL *baijmkl_dest; 214 215 PetscFunctionBegin; 216 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 217 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 218 ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr); 219 (*M)->spptr = (void*)baijmkl_dest; 220 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 221 baijmkl_dest->sparse_optimized = PETSC_FALSE; 222 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 227 { 228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 229 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 230 const PetscScalar *x; 231 PetscScalar *y; 232 PetscErrorCode ierr; 233 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 234 235 PetscFunctionBegin; 236 /* If there are no nonzero entries, zero yy and return immediately. */ 237 if(!a->nz) { 238 PetscInt i; 239 PetscInt m=a->mbs*A->rmap->bs; 240 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 241 for (i=0; i<m; i++) { 242 y[i] = 0.0; 243 } 244 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 249 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 250 251 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 252 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 253 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 254 if (!baijmkl->sparse_optimized) { 255 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 256 } 257 258 /* Call MKL SpMV2 executor routine to do the MatMult. */ 259 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 260 261 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 262 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 263 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 267 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 268 { 269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 270 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 271 const PetscScalar *x; 272 PetscScalar *y; 273 PetscErrorCode ierr; 274 sparse_status_t stat; 275 276 PetscFunctionBegin; 277 /* If there are no nonzero entries, zero yy and return immediately. */ 278 if(!a->nz) { 279 PetscInt i; 280 PetscInt n=a->nbs; 281 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 282 for (i=0; i<n; i++) { 283 y[i] = 0.0; 284 } 285 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 290 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 291 292 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 293 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 294 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 295 if (!baijmkl->sparse_optimized) { 296 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 297 } 298 299 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 300 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 301 302 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 312 const PetscScalar *x; 313 PetscScalar *y,*z; 314 PetscErrorCode ierr; 315 PetscInt m=a->mbs*A->rmap->bs; 316 PetscInt i; 317 318 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 319 320 PetscFunctionBegin; 321 /* If there are no nonzero entries, set zz = yy and return immediately. */ 322 if(!a->nz) { 323 PetscInt i; 324 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 325 for (i=0; i<m; i++) { 326 z[i] = y[i]; 327 } 328 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 332 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 333 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 334 335 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 336 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 337 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 338 if (!baijmkl->sparse_optimized) { 339 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 340 } 341 342 /* Call MKL sparse BLAS routine to do the MatMult. */ 343 if (zz == yy) { 344 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 345 * with alpha and beta both set to 1.0. */ 346 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 347 } else { 348 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 349 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 350 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 351 for (i=0; i<m; i++) { 352 z[i] += y[i]; 353 } 354 } 355 356 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 357 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 358 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 363 { 364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 365 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 366 const PetscScalar *x; 367 PetscScalar *y,*z; 368 PetscErrorCode ierr; 369 PetscInt n=a->nbs*A->rmap->bs; 370 PetscInt i; 371 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 372 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 373 374 PetscFunctionBegin; 375 /* If there are no nonzero entries, set zz = yy and return immediately. */ 376 if(!a->nz) { 377 PetscInt i; 378 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 379 for (i=0; i<n; i++) { 380 z[i] = y[i]; 381 } 382 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 387 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 388 389 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 390 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 391 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 392 if (!baijmkl->sparse_optimized) { 393 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 394 } 395 396 /* Call MKL sparse BLAS routine to do the MatMult. */ 397 if (zz == yy) { 398 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 399 * with alpha and beta both set to 1.0. */ 400 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 401 } else { 402 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 403 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 404 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 405 for (i=0; i<n; i++) { 406 z[i] += y[i]; 407 } 408 } 409 410 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 412 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 417 { 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 422 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 427 { 428 PetscErrorCode ierr; 429 430 PetscFunctionBegin; 431 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 432 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 437 { 438 PetscErrorCode ierr; 439 440 PetscFunctionBegin; 441 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 442 if (str == SAME_NONZERO_PATTERN) { 443 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 444 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 445 } 446 PetscFunctionReturn(0); 447 } 448 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 449 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 450 * routine, but can also be used to convert an assembled SeqBAIJ matrix 451 * into a SeqBAIJMKL one. */ 452 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 453 { 454 PetscErrorCode ierr; 455 Mat B = *newmat; 456 Mat_SeqBAIJMKL *baijmkl; 457 PetscBool sametype; 458 459 PetscFunctionBegin; 460 if (reuse == MAT_INITIAL_MATRIX) { 461 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 462 } 463 464 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 465 if (sametype) PetscFunctionReturn(0); 466 467 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 468 B->spptr = (void*)baijmkl; 469 470 /* Set function pointers for methods that we inherit from BAIJ but override. 471 * We also parse some command line options below, since those determine some of the methods we point to. */ 472 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 473 474 baijmkl->sparse_optimized = PETSC_FALSE; 475 476 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 477 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 478 479 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 480 *newmat = B; 481 PetscFunctionReturn(0); 482 } 483 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 484 { 485 PetscErrorCode ierr; 486 PetscFunctionBegin; 487 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 488 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 489 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 490 A->ops->destroy = MatDestroy_SeqBAIJMKL; 491 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 492 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 493 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 494 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 495 A->ops->scale = MatScale_SeqBAIJMKL; 496 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 497 A->ops->axpy = MatAXPY_SeqBAIJMKL; 498 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 499 PetscFunctionReturn(0); 500 } 501 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 502 /*@C 503 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 504 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 505 routines from Intel MKL whenever possible. 506 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 507 operations are currently supported. 508 If the installed version of MKL supports the "SpMV2" sparse 509 inspector-executor routines, then those are used by default. 510 Default PETSc kernels are used otherwise. 511 512 Input Parameters: 513 + comm - MPI communicator, set to PETSC_COMM_SELF 514 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 515 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 516 . m - number of rows 517 . n - number of columns 518 . nz - number of nonzero blocks per block row (same for all rows) 519 - nnz - array containing the number of nonzero blocks in the various block rows 520 (possibly different for each block row) or NULL 521 522 523 Output Parameter: 524 . A - the matrix 525 526 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 527 MatXXXXSetPreallocation() paradgm instead of this routine directly. 528 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 529 530 Options Database Keys: 531 . -mat_no_unroll - uses code that does not unroll the loops in the 532 block calculations (much slower) 533 . -mat_block_size - size of the blocks to use 534 535 Level: intermediate 536 537 Notes: 538 The number of rows and columns must be divisible by blocksize. 539 540 If the nnz parameter is given then the nz parameter is ignored 541 542 A nonzero block is any block that as 1 or more nonzeros in it 543 544 The block AIJ format is fully compatible with standard Fortran 77 545 storage. That is, the stored row and column indices can begin at 546 either one (as in Fortran) or zero. See the users' manual for details. 547 548 Specify the preallocated storage with either nz or nnz (not both). 549 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 550 allocation. See Users-Manual: ch_mat for details. 551 matrices. 552 553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 554 555 @*/ 556 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 557 { 558 PetscErrorCode ierr; 559 560 PetscFunctionBegin; 561 ierr = MatCreate(comm,A);CHKERRQ(ierr); 562 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 563 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 564 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 565 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 566 #else 567 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 568 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 569 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 570 #endif 571 PetscFunctionReturn(0); 572 } 573 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 574 { 575 PetscErrorCode ierr; 576 577 PetscFunctionBegin; 578 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 579 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 580 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 581 #else 582 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 583 #endif 584 PetscFunctionReturn(0); 585 } 586