1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30 { 31 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 PetscErrorCode ierr; 34 Mat B = *newmat; 35 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqBAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 45 B->ops->destroy = MatDestroy_SeqBAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 48 B->ops->scale = MatScale_SeqBAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 50 B->ops->axpy = MatAXPY_SeqBAIJ; 51 52 switch (A->rmap->bs) { 53 case 1: 54 B->ops->mult = MatMult_SeqBAIJ_1; 55 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 56 break; 57 case 2: 58 B->ops->mult = MatMult_SeqBAIJ_2; 59 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 60 break; 61 case 3: 62 B->ops->mult = MatMult_SeqBAIJ_3; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 64 break; 65 case 4: 66 B->ops->mult = MatMult_SeqBAIJ_4; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 68 break; 69 case 5: 70 B->ops->mult = MatMult_SeqBAIJ_5; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 72 break; 73 case 6: 74 B->ops->mult = MatMult_SeqBAIJ_6; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 76 break; 77 case 7: 78 B->ops->mult = MatMult_SeqBAIJ_7; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 80 break; 81 case 11: 82 B->ops->mult = MatMult_SeqBAIJ_11; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 84 break; 85 case 15: 86 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 88 break; 89 default: 90 B->ops->mult = MatMult_SeqBAIJ_N; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 92 break; 93 } 94 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 95 96 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 97 * simply involves destroying the MKL sparse matrix handle and then freeing 98 * the spptr pointer. */ 99 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 100 101 if (baijmkl->sparse_optimized) { 102 sparse_status_t stat; 103 stat = mkl_sparse_destroy(baijmkl->bsrA); 104 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 105 } 106 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 107 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 108 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 109 #endif 110 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 111 112 /* Change the type of B to MATSEQBAIJ. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 114 115 *newmat = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 120 { 121 PetscErrorCode ierr; 122 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 123 124 PetscFunctionBegin; 125 if (baijmkl) { 126 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 127 if (baijmkl->sparse_optimized) { 128 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 129 stat = mkl_sparse_destroy(baijmkl->bsrA); 130 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 131 } 132 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 133 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 134 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 135 #endif 136 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 137 } 138 139 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 140 * to destroy everything that remains. */ 141 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 142 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 147 { 148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 149 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 150 PetscInt mbs, nbs, nz, bs; 151 MatScalar *aa; 152 PetscInt *aj,*ai; 153 sparse_status_t stat; 154 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 155 PetscErrorCode ierr; 156 PetscInt i; 157 #endif 158 159 PetscFunctionBegin; 160 if (baijmkl->sparse_optimized) { 161 /* Matrix has been previously assembled and optimized. Must destroy old 162 * matrix handle before running the optimization step again. */ 163 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 164 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 165 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 166 #endif 167 stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat); 168 } 169 baijmkl->sparse_optimized = PETSC_FALSE; 170 171 /* Now perform the SpMV2 setup and matrix optimization. */ 172 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 173 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 174 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 175 mbs = a->mbs; 176 nbs = a->nbs; 177 nz = a->nz; 178 bs = A->rmap->bs; 179 aa = a->a; 180 181 if ((nz!=0) & !(A->structure_only)) { 182 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 183 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 184 #if defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED) 185 aj = a->j; 186 ai = a->i; 187 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 188 #else 189 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 190 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 191 for (i=0;i<mbs+1;i++) 192 ai[i] = a->i[i]+1; 193 for (i=0;i<nz;i++) 194 aj[i] = a->j[i]+1; 195 aa = a->a; 196 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 197 baijmkl->ai1 = ai; 198 baijmkl->aj1 = aj; 199 #endif 200 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat); 201 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat); 202 stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat); 203 baijmkl->sparse_optimized = PETSC_TRUE; 204 } 205 PetscFunctionReturn(0); 206 } 207 208 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 209 { 210 PetscErrorCode ierr; 211 Mat_SeqBAIJMKL *baijmkl; 212 Mat_SeqBAIJMKL *baijmkl_dest; 213 214 PetscFunctionBegin; 215 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 216 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 217 ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr); 218 (*M)->spptr = (void*)baijmkl_dest; 219 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 220 baijmkl_dest->sparse_optimized = PETSC_FALSE; 221 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 226 { 227 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 228 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 229 const PetscScalar *x; 230 PetscScalar *y; 231 PetscErrorCode ierr; 232 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 233 234 PetscFunctionBegin; 235 /* If there are no nonzero entries, zero yy and return immediately. */ 236 if(!a->nz) { 237 PetscInt i; 238 PetscInt m=a->mbs*A->rmap->bs; 239 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 240 for (i=0; i<m; i++) { 241 y[i] = 0.0; 242 } 243 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 248 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 249 250 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 251 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 252 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 253 if (!baijmkl->sparse_optimized) { 254 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 255 } 256 257 /* Call MKL SpMV2 executor routine to do the MatMult. */ 258 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 259 260 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 261 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 262 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 267 { 268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 269 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 270 const PetscScalar *x; 271 PetscScalar *y; 272 PetscErrorCode ierr; 273 sparse_status_t stat; 274 275 PetscFunctionBegin; 276 /* If there are no nonzero entries, zero yy and return immediately. */ 277 if(!a->nz) { 278 PetscInt i; 279 PetscInt n=a->nbs; 280 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 281 for (i=0; i<n; i++) { 282 y[i] = 0.0; 283 } 284 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 289 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 290 291 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 292 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 293 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 294 if (!baijmkl->sparse_optimized) { 295 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 296 } 297 298 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 299 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 300 301 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 303 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 308 { 309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 310 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 311 const PetscScalar *x; 312 PetscScalar *y,*z; 313 PetscErrorCode ierr; 314 PetscInt m=a->mbs*A->rmap->bs; 315 PetscInt i; 316 317 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 318 319 PetscFunctionBegin; 320 /* If there are no nonzero entries, set zz = yy and return immediately. */ 321 if(!a->nz) { 322 PetscInt i; 323 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 324 for (i=0; i<m; i++) { 325 z[i] = y[i]; 326 } 327 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 332 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 333 334 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 335 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 336 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 337 if (!baijmkl->sparse_optimized) { 338 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 339 } 340 341 /* Call MKL sparse BLAS routine to do the MatMult. */ 342 if (zz == yy) { 343 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 344 * with alpha and beta both set to 1.0. */ 345 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 346 } else { 347 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 348 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 349 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 350 for (i=0; i<m; i++) { 351 z[i] += y[i]; 352 } 353 } 354 355 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 356 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 358 PetscFunctionReturn(0); 359 } 360 361 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 365 const PetscScalar *x; 366 PetscScalar *y,*z; 367 PetscErrorCode ierr; 368 PetscInt n=a->nbs*A->rmap->bs; 369 PetscInt i; 370 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 371 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 372 373 PetscFunctionBegin; 374 /* If there are no nonzero entries, set zz = yy and return immediately. */ 375 if(!a->nz) { 376 PetscInt i; 377 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 378 for (i=0; i<n; i++) { 379 z[i] = y[i]; 380 } 381 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 386 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 387 388 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 389 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 390 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 391 if (!baijmkl->sparse_optimized) { 392 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 393 } 394 395 /* Call MKL sparse BLAS routine to do the MatMult. */ 396 if (zz == yy) { 397 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 398 * with alpha and beta both set to 1.0. */ 399 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 400 } else { 401 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 402 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 403 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 404 for (i=0; i<n; i++) { 405 z[i] += y[i]; 406 } 407 } 408 409 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 416 { 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 421 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 426 { 427 PetscErrorCode ierr; 428 429 PetscFunctionBegin; 430 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 431 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 436 { 437 PetscErrorCode ierr; 438 439 PetscFunctionBegin; 440 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 441 if (str == SAME_NONZERO_PATTERN) { 442 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 443 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 444 } 445 PetscFunctionReturn(0); 446 } 447 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 448 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 449 * routine, but can also be used to convert an assembled SeqBAIJ matrix 450 * into a SeqBAIJMKL one. */ 451 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 452 { 453 PetscErrorCode ierr; 454 Mat B = *newmat; 455 Mat_SeqBAIJMKL *baijmkl; 456 PetscBool sametype; 457 458 PetscFunctionBegin; 459 if (reuse == MAT_INITIAL_MATRIX) { 460 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 461 } 462 463 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 464 if (sametype) PetscFunctionReturn(0); 465 466 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 467 B->spptr = (void*)baijmkl; 468 469 /* Set function pointers for methods that we inherit from BAIJ but override. 470 * We also parse some command line options below, since those determine some of the methods we point to. */ 471 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 472 473 baijmkl->sparse_optimized = PETSC_FALSE; 474 475 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 476 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 477 478 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 479 *newmat = B; 480 PetscFunctionReturn(0); 481 } 482 483 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 484 { 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 489 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 490 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 491 A->ops->destroy = MatDestroy_SeqBAIJMKL; 492 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 493 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 494 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 495 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 496 A->ops->scale = MatScale_SeqBAIJMKL; 497 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 498 A->ops->axpy = MatAXPY_SeqBAIJMKL; 499 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 500 PetscFunctionReturn(0); 501 } 502 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 503 504 /*@C 505 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 506 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 507 routines from Intel MKL whenever possible. 508 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 509 operations are currently supported. 510 If the installed version of MKL supports the "SpMV2" sparse 511 inspector-executor routines, then those are used by default. 512 Default PETSc kernels are used otherwise. 513 514 Input Parameters: 515 + comm - MPI communicator, set to PETSC_COMM_SELF 516 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 517 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 518 . m - number of rows 519 . n - number of columns 520 . nz - number of nonzero blocks per block row (same for all rows) 521 - nnz - array containing the number of nonzero blocks in the various block rows 522 (possibly different for each block row) or NULL 523 524 525 Output Parameter: 526 . A - the matrix 527 528 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 529 MatXXXXSetPreallocation() paradigm instead of this routine directly. 530 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 531 532 Options Database Keys: 533 . -mat_no_unroll - uses code that does not unroll the loops in the 534 block calculations (much slower) 535 . -mat_block_size - size of the blocks to use 536 537 Level: intermediate 538 539 Notes: 540 The number of rows and columns must be divisible by blocksize. 541 542 If the nnz parameter is given then the nz parameter is ignored 543 544 A nonzero block is any block that as 1 or more nonzeros in it 545 546 The block AIJ format is fully compatible with standard Fortran 77 547 storage. That is, the stored row and column indices can begin at 548 either one (as in Fortran) or zero. See the users' manual for details. 549 550 Specify the preallocated storage with either nz or nnz (not both). 551 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 552 allocation. See Users-Manual: ch_mat for details. 553 matrices. 554 555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 556 557 @*/ 558 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 559 { 560 PetscErrorCode ierr; 561 562 PetscFunctionBegin; 563 ierr = MatCreate(comm,A);CHKERRQ(ierr); 564 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 565 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 566 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 567 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 568 #else 569 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 570 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 571 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 572 #endif 573 PetscFunctionReturn(0); 574 } 575 576 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 577 { 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 582 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 583 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 584 #else 585 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 586 #endif 587 PetscFunctionReturn(0); 588 } 589