1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 #include <mkl_spblas.h> 11 12 static PetscBool PetscSeqBAIJSupportsZeroBased(void) 13 { 14 static PetscBool set = PETSC_FALSE,value; 15 int n=1,ia[1],ja[1]; 16 float a[1]; 17 sparse_status_t status; 18 sparse_matrix_t A; 19 20 if (!set) { 21 status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a); 22 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 23 (void) mkl_sparse_destroy(A); 24 set = PETSC_TRUE; 25 } 26 return value; 27 } 28 29 typedef struct { 30 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 31 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 32 struct matrix_descr descr; 33 PetscInt *ai1; 34 PetscInt *aj1; 35 } Mat_SeqBAIJMKL; 36 37 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 38 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 39 40 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 41 { 42 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 43 /* so we will ignore 'MatType type'. */ 44 PetscErrorCode ierr; 45 Mat B = *newmat; 46 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 47 48 PetscFunctionBegin; 49 if (reuse == MAT_INITIAL_MATRIX) { 50 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 51 } 52 53 /* Reset the original function pointers. */ 54 B->ops->duplicate = MatDuplicate_SeqBAIJ; 55 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 56 B->ops->destroy = MatDestroy_SeqBAIJ; 57 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 58 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 59 B->ops->scale = MatScale_SeqBAIJ; 60 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 61 B->ops->axpy = MatAXPY_SeqBAIJ; 62 63 switch (A->rmap->bs) { 64 case 1: 65 B->ops->mult = MatMult_SeqBAIJ_1; 66 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 67 break; 68 case 2: 69 B->ops->mult = MatMult_SeqBAIJ_2; 70 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 71 break; 72 case 3: 73 B->ops->mult = MatMult_SeqBAIJ_3; 74 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 75 break; 76 case 4: 77 B->ops->mult = MatMult_SeqBAIJ_4; 78 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 79 break; 80 case 5: 81 B->ops->mult = MatMult_SeqBAIJ_5; 82 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 83 break; 84 case 6: 85 B->ops->mult = MatMult_SeqBAIJ_6; 86 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 87 break; 88 case 7: 89 B->ops->mult = MatMult_SeqBAIJ_7; 90 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 91 break; 92 case 11: 93 B->ops->mult = MatMult_SeqBAIJ_11; 94 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 95 break; 96 case 15: 97 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 98 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 99 break; 100 default: 101 B->ops->mult = MatMult_SeqBAIJ_N; 102 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 103 break; 104 } 105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 106 107 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 108 * simply involves destroying the MKL sparse matrix handle and then freeing 109 * the spptr pointer. */ 110 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 111 112 if (baijmkl->sparse_optimized) { 113 sparse_status_t stat; 114 stat = mkl_sparse_destroy(baijmkl->bsrA); 115 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 116 } 117 ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr); 118 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 119 120 /* Change the type of B to MATSEQBAIJ. */ 121 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 122 123 *newmat = B; 124 PetscFunctionReturn(0); 125 } 126 127 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 128 { 129 PetscErrorCode ierr; 130 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 131 132 PetscFunctionBegin; 133 if (baijmkl) { 134 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 135 if (baijmkl->sparse_optimized) { 136 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 137 stat = mkl_sparse_destroy(baijmkl->bsrA); 138 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 139 } 140 ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr); 141 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 142 } 143 144 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 145 * to destroy everything that remains. */ 146 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 147 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 152 { 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 155 PetscInt mbs, nbs, nz, bs; 156 MatScalar *aa; 157 PetscInt *aj,*ai; 158 sparse_status_t stat; 159 PetscErrorCode ierr; 160 PetscInt i; 161 162 PetscFunctionBegin; 163 if (baijmkl->sparse_optimized) { 164 /* Matrix has been previously assembled and optimized. Must destroy old 165 * matrix handle before running the optimization step again. */ 166 ierr = PetscFree2(baijmkl->ai1,baijmkl->aj1);CHKERRQ(ierr); 167 stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat); 168 } 169 baijmkl->sparse_optimized = PETSC_FALSE; 170 171 /* Now perform the SpMV2 setup and matrix optimization. */ 172 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 173 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 174 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 175 mbs = a->mbs; 176 nbs = a->nbs; 177 nz = a->nz; 178 bs = A->rmap->bs; 179 aa = a->a; 180 181 if ((nz!=0) & !(A->structure_only)) { 182 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 183 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 184 if (PetscSeqBAIJSupportsZeroBased()) { 185 aj = a->j; 186 ai = a->i; 187 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 188 } else { 189 ierr = PetscMalloc2(mbs+1,&ai,nz,&aj);CHKERRQ(ierr); 190 for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1; 191 for (i=0;i<nz;i++) aj[i] = a->j[i]+1; 192 aa = a->a; 193 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 194 baijmkl->ai1 = ai; 195 baijmkl->aj1 = aj; 196 } 197 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat); 198 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat); 199 stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat); 200 baijmkl->sparse_optimized = PETSC_TRUE; 201 } 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 206 { 207 PetscErrorCode ierr; 208 Mat_SeqBAIJMKL *baijmkl; 209 Mat_SeqBAIJMKL *baijmkl_dest; 210 211 PetscFunctionBegin; 212 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 213 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 214 ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr); 215 (*M)->spptr = (void*)baijmkl_dest; 216 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 217 baijmkl_dest->sparse_optimized = PETSC_FALSE; 218 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 223 { 224 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 225 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 226 const PetscScalar *x; 227 PetscScalar *y; 228 PetscErrorCode ierr; 229 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 230 231 PetscFunctionBegin; 232 /* If there are no nonzero entries, zero yy and return immediately. */ 233 if (!a->nz) { 234 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 239 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 240 241 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 242 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 243 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 244 if (!baijmkl->sparse_optimized) { 245 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 246 } 247 248 /* Call MKL SpMV2 executor routine to do the MatMult. */ 249 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 250 251 ierr = PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);CHKERRQ(ierr); 252 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 253 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 258 { 259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 260 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 261 const PetscScalar *x; 262 PetscScalar *y; 263 PetscErrorCode ierr; 264 sparse_status_t stat; 265 266 PetscFunctionBegin; 267 /* If there are no nonzero entries, zero yy and return immediately. */ 268 if(!a->nz) { 269 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 275 276 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 277 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 278 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 279 if (!baijmkl->sparse_optimized) { 280 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 281 } 282 283 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 284 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 285 286 ierr = PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);CHKERRQ(ierr); 287 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 293 { 294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 295 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 296 const PetscScalar *x; 297 PetscScalar *y,*z; 298 PetscErrorCode ierr; 299 PetscInt m=a->mbs*A->rmap->bs; 300 PetscInt i; 301 302 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 303 304 PetscFunctionBegin; 305 /* If there are no nonzero entries, set zz = yy and return immediately. */ 306 if (!a->nz) { 307 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 312 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 313 314 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 315 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 316 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 317 if (!baijmkl->sparse_optimized) { 318 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 319 } 320 321 /* Call MKL sparse BLAS routine to do the MatMult. */ 322 if (zz == yy) { 323 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 324 * with alpha and beta both set to 1.0. */ 325 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 326 } else { 327 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 328 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 329 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 330 for (i=0; i<m; i++) { 331 z[i] += y[i]; 332 } 333 } 334 335 ierr = PetscLogFlops(2.0*a->bs2*a->nz);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 342 { 343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 344 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 345 const PetscScalar *x; 346 PetscScalar *y,*z; 347 PetscErrorCode ierr; 348 PetscInt n=a->nbs*A->rmap->bs; 349 PetscInt i; 350 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 351 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 352 353 PetscFunctionBegin; 354 /* If there are no nonzero entries, set zz = yy and return immediately. */ 355 if(!a->nz) { 356 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 361 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 362 363 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 364 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 365 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 366 if (!baijmkl->sparse_optimized) { 367 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 368 } 369 370 /* Call MKL sparse BLAS routine to do the MatMult. */ 371 if (zz == yy) { 372 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 373 * with alpha and beta both set to 1.0. */ 374 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 375 } else { 376 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 377 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 378 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 379 for (i=0; i<n; i++) { 380 z[i] += y[i]; 381 } 382 } 383 384 ierr = PetscLogFlops(2.0*a->bs2*a->nz);CHKERRQ(ierr); 385 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 386 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 391 { 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 396 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 401 { 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 406 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 411 { 412 PetscErrorCode ierr; 413 414 PetscFunctionBegin; 415 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 416 if (str == SAME_NONZERO_PATTERN) { 417 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 418 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 419 } 420 PetscFunctionReturn(0); 421 } 422 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 423 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 424 * routine, but can also be used to convert an assembled SeqBAIJ matrix 425 * into a SeqBAIJMKL one. */ 426 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 427 { 428 PetscErrorCode ierr; 429 Mat B = *newmat; 430 Mat_SeqBAIJMKL *baijmkl; 431 PetscBool sametype; 432 433 PetscFunctionBegin; 434 if (reuse == MAT_INITIAL_MATRIX) { 435 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 436 } 437 438 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 439 if (sametype) PetscFunctionReturn(0); 440 441 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 442 B->spptr = (void*)baijmkl; 443 444 /* Set function pointers for methods that we inherit from BAIJ but override. 445 * We also parse some command line options below, since those determine some of the methods we point to. */ 446 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 447 448 baijmkl->sparse_optimized = PETSC_FALSE; 449 450 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 451 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 452 453 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 454 *newmat = B; 455 PetscFunctionReturn(0); 456 } 457 458 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 459 { 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 464 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 465 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 466 A->ops->destroy = MatDestroy_SeqBAIJMKL; 467 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 468 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 469 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 470 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 471 A->ops->scale = MatScale_SeqBAIJMKL; 472 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 473 A->ops->axpy = MatAXPY_SeqBAIJMKL; 474 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 475 PetscFunctionReturn(0); 476 } 477 478 /*@C 479 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 480 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 481 routines from Intel MKL whenever possible. 482 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 483 operations are currently supported. 484 If the installed version of MKL supports the "SpMV2" sparse 485 inspector-executor routines, then those are used by default. 486 Default PETSc kernels are used otherwise. 487 488 Input Parameters: 489 + comm - MPI communicator, set to PETSC_COMM_SELF 490 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 491 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 492 . m - number of rows 493 . n - number of columns 494 . nz - number of nonzero blocks per block row (same for all rows) 495 - nnz - array containing the number of nonzero blocks in the various block rows 496 (possibly different for each block row) or NULL 497 498 499 Output Parameter: 500 . A - the matrix 501 502 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 503 MatXXXXSetPreallocation() paradigm instead of this routine directly. 504 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 505 506 Options Database Keys: 507 . -mat_no_unroll - uses code that does not unroll the loops in the 508 block calculations (much slower) 509 . -mat_block_size - size of the blocks to use 510 511 Level: intermediate 512 513 Notes: 514 The number of rows and columns must be divisible by blocksize. 515 516 If the nnz parameter is given then the nz parameter is ignored 517 518 A nonzero block is any block that as 1 or more nonzeros in it 519 520 The block AIJ format is fully compatible with standard Fortran 77 521 storage. That is, the stored row and column indices can begin at 522 either one (as in Fortran) or zero. See the users' manual for details. 523 524 Specify the preallocated storage with either nz or nnz (not both). 525 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 526 allocation. See Users-Manual: ch_mat for details. 527 matrices. 528 529 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 530 531 @*/ 532 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 533 { 534 PetscErrorCode ierr; 535 536 PetscFunctionBegin; 537 ierr = MatCreate(comm,A);CHKERRQ(ierr); 538 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 539 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 540 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 545 { 546 PetscErrorCode ierr; 547 548 PetscFunctionBegin; 549 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 550 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553