1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL verion is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 16 typedef struct { 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 21 PetscInt *ai1; 22 PetscInt *aj1; 23 #endif 24 } Mat_SeqBAIJMKL; 25 26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30 { 31 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 PetscErrorCode ierr; 34 Mat B = *newmat; 35 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqBAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 45 B->ops->destroy = MatDestroy_SeqBAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 48 B->ops->scale = MatScale_SeqBAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 50 B->ops->axpy = MatAXPY_SeqBAIJ; 51 52 switch (A->rmap->bs) { 53 case 1: 54 B->ops->mult = MatMult_SeqBAIJ_1; 55 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 56 break; 57 case 2: 58 B->ops->mult = MatMult_SeqBAIJ_2; 59 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 60 break; 61 case 3: 62 B->ops->mult = MatMult_SeqBAIJ_3; 63 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 64 break; 65 case 4: 66 B->ops->mult = MatMult_SeqBAIJ_4; 67 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 68 break; 69 case 5: 70 B->ops->mult = MatMult_SeqBAIJ_5; 71 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 72 break; 73 case 6: 74 B->ops->mult = MatMult_SeqBAIJ_6; 75 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 76 break; 77 case 7: 78 B->ops->mult = MatMult_SeqBAIJ_7; 79 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 80 break; 81 case 11: 82 B->ops->mult = MatMult_SeqBAIJ_11; 83 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 84 break; 85 case 15: 86 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 87 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 88 break; 89 default: 90 B->ops->mult = MatMult_SeqBAIJ_N; 91 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 92 break; 93 } 94 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr); 95 96 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 97 * simply involves destroying the MKL sparse matrix handle and then freeing 98 * the spptr pointer. */ 99 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr; 100 101 if (baijmkl->sparse_optimized) { 102 sparse_status_t stat; 103 stat = mkl_sparse_destroy(baijmkl->bsrA); 104 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 105 } 106 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 107 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 108 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 109 #endif 110 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 111 112 /* Change the type of B to MATSEQBAIJ. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr); 114 115 *newmat = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 120 { 121 PetscErrorCode ierr; 122 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 123 124 PetscFunctionBegin; 125 if (baijmkl) { 126 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 127 if (baijmkl->sparse_optimized) { 128 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 129 stat = mkl_sparse_destroy(baijmkl->bsrA); 130 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 131 } 132 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 133 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 134 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 135 #endif 136 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 137 } 138 139 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 140 * to destroy everything that remains. */ 141 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr); 142 ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 147 { 148 PetscErrorCode ierr; 149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 150 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 151 PetscInt mbs, nbs, nz, bs, i; 152 MatScalar *aa; 153 PetscInt *aj,*ai; 154 sparse_status_t stat; 155 156 PetscFunctionBegin; 157 if (baijmkl->sparse_optimized) { 158 /* Matrix has been previously assembled and optimized. Must destroy old 159 * matrix handle before running the optimization step again. */ 160 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 161 ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr); 162 ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr); 163 #endif 164 stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat); 165 } 166 baijmkl->sparse_optimized = PETSC_FALSE; 167 168 /* Now perform the SpMV2 setup and matrix optimization. */ 169 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 170 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 171 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 172 mbs = a->mbs; 173 nbs = a->nbs; 174 nz = a->nz; 175 bs = A->rmap->bs; 176 aa = a->a; 177 178 if ((nz!=0) & !(A->structure_only)) { 179 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 180 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 181 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED 182 aj = a->j; 183 ai = a->i; 184 stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 185 #else 186 ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr); 187 ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr); 188 for (i=0;i<mbs+1;i++) 189 ai[i] = a->i[i]+1; 190 for (i=0;i<nz;i++) 191 aj[i] = a->j[i]+1; 192 aa = a->a; 193 stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat); 194 baijmkl->ai1 = ai; 195 baijmkl->aj1 = aj; 196 #endif 197 stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat); 198 stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat); 199 stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat); 200 baijmkl->sparse_optimized = PETSC_TRUE; 201 } 202 PetscFunctionReturn(0); 203 } 204 205 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 206 { 207 PetscErrorCode ierr; 208 Mat_SeqBAIJMKL *baijmkl; 209 Mat_SeqBAIJMKL *baijmkl_dest; 210 211 PetscFunctionBegin; 212 ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr); 213 baijmkl = (Mat_SeqBAIJMKL*) A->spptr; 214 ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr); 215 (*M)->spptr = (void*)baijmkl_dest; 216 ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr); 217 baijmkl_dest->sparse_optimized = PETSC_FALSE; 218 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 223 { 224 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 225 Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr; 226 const PetscScalar *x; 227 PetscScalar *y; 228 PetscErrorCode ierr; 229 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 230 231 PetscFunctionBegin; 232 /* If there are no nonzero entries, zero yy and return immediately. */ 233 if(!a->nz) { 234 PetscInt i; 235 PetscInt m=a->mbs*A->rmap->bs; 236 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 237 for (i=0; i<m; i++) { 238 y[i] = 0.0; 239 } 240 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 246 247 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 248 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 249 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 250 if (!baijmkl->sparse_optimized) { 251 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 252 } 253 254 /* Call MKL SpMV2 executor routine to do the MatMult. */ 255 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 256 257 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 258 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 259 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 264 { 265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 266 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 267 const PetscScalar *x; 268 PetscScalar *y; 269 PetscErrorCode ierr; 270 sparse_status_t stat; 271 272 PetscFunctionBegin; 273 /* If there are no nonzero entries, zero yy and return immediately. */ 274 if(!a->nz) { 275 PetscInt i; 276 PetscInt n=a->nbs; 277 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 278 for (i=0; i<n; i++) { 279 y[i] = 0.0; 280 } 281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 286 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 287 288 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 289 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 290 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 291 if (!baijmkl->sparse_optimized) { 292 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 293 } 294 295 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 296 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat); 297 298 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 299 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 300 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 305 { 306 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 307 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 308 const PetscScalar *x; 309 PetscScalar *y,*z; 310 PetscErrorCode ierr; 311 PetscInt m=a->mbs*A->rmap->bs; 312 PetscInt i; 313 314 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 315 316 PetscFunctionBegin; 317 /* If there are no nonzero entries, set zz = yy and return immediately. */ 318 if(!a->nz) { 319 PetscInt i; 320 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 321 for (i=0; i<m; i++) { 322 z[i] = y[i]; 323 } 324 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 329 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 330 331 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 332 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 333 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 334 if (!baijmkl->sparse_optimized) { 335 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 336 } 337 338 /* Call MKL sparse BLAS routine to do the MatMult. */ 339 if (zz == yy) { 340 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 341 * with alpha and beta both set to 1.0. */ 342 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 343 } else { 344 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 345 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 346 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 347 for (i=0; i<m; i++) { 348 z[i] += y[i]; 349 } 350 } 351 352 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 361 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr; 362 const PetscScalar *x; 363 PetscScalar *y,*z; 364 PetscErrorCode ierr; 365 PetscInt n=a->nbs*A->rmap->bs; 366 PetscInt i; 367 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 368 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 369 370 PetscFunctionBegin; 371 /* If there are no nonzero entries, set zz = yy and return immediately. */ 372 if(!a->nz) { 373 PetscInt i; 374 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 375 for (i=0; i<n; i++) { 376 z[i] = y[i]; 377 } 378 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 384 385 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 386 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 387 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 388 if (!baijmkl->sparse_optimized) { 389 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 390 } 391 392 /* Call MKL sparse BLAS routine to do the MatMult. */ 393 if (zz == yy) { 394 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 395 * with alpha and beta both set to 1.0. */ 396 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat); 397 } else { 398 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 399 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 400 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat); 401 for (i=0; i<n; i++) { 402 z[i] += y[i]; 403 } 404 } 405 406 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha) 413 { 414 PetscErrorCode ierr; 415 416 PetscFunctionBegin; 417 ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr); 418 ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr) 423 { 424 PetscErrorCode ierr; 425 426 PetscFunctionBegin; 427 ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr); 428 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 433 { 434 PetscErrorCode ierr; 435 436 PetscFunctionBegin; 437 ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr); 438 if (str == SAME_NONZERO_PATTERN) { 439 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 440 ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 441 } 442 PetscFunctionReturn(0); 443 } 444 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 445 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 446 * routine, but can also be used to convert an assembled SeqBAIJ matrix 447 * into a SeqBAIJMKL one. */ 448 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 449 { 450 PetscErrorCode ierr; 451 Mat B = *newmat; 452 Mat_SeqBAIJMKL *baijmkl; 453 PetscBool sametype; 454 455 PetscFunctionBegin; 456 if (reuse == MAT_INITIAL_MATRIX) { 457 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 458 } 459 460 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 461 if (sametype) PetscFunctionReturn(0); 462 463 ierr = PetscNewLog(B,&baijmkl);CHKERRQ(ierr); 464 B->spptr = (void*)baijmkl; 465 466 /* Set function pointers for methods that we inherit from BAIJ but override. 467 * We also parse some command line options below, since those determine some of the methods we point to. */ 468 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 469 470 baijmkl->sparse_optimized = PETSC_FALSE; 471 472 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr); 473 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr); 474 475 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr); 476 *newmat = B; 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 481 { 482 PetscErrorCode ierr; 483 484 PetscFunctionBegin; 485 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 486 ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr); 487 ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 488 A->ops->destroy = MatDestroy_SeqBAIJMKL; 489 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 490 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 491 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 492 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 493 A->ops->scale = MatScale_SeqBAIJMKL; 494 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 495 A->ops->axpy = MatAXPY_SeqBAIJMKL; 496 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 497 PetscFunctionReturn(0); 498 } 499 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 500 501 /*@C 502 MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL. 503 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 504 routines from Intel MKL whenever possible. 505 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 506 operations are currently supported. 507 If the installed version of MKL supports the "SpMV2" sparse 508 inspector-executor routines, then those are used by default. 509 Default PETSc kernels are used otherwise. 510 511 Input Parameters: 512 + comm - MPI communicator, set to PETSC_COMM_SELF 513 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 514 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 515 . m - number of rows 516 . n - number of columns 517 . nz - number of nonzero blocks per block row (same for all rows) 518 - nnz - array containing the number of nonzero blocks in the various block rows 519 (possibly different for each block row) or NULL 520 521 522 Output Parameter: 523 . A - the matrix 524 525 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 526 MatXXXXSetPreallocation() paradgm instead of this routine directly. 527 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 528 529 Options Database Keys: 530 . -mat_no_unroll - uses code that does not unroll the loops in the 531 block calculations (much slower) 532 . -mat_block_size - size of the blocks to use 533 534 Level: intermediate 535 536 Notes: 537 The number of rows and columns must be divisible by blocksize. 538 539 If the nnz parameter is given then the nz parameter is ignored 540 541 A nonzero block is any block that as 1 or more nonzeros in it 542 543 The block AIJ format is fully compatible with standard Fortran 77 544 storage. That is, the stored row and column indices can begin at 545 either one (as in Fortran) or zero. See the users' manual for details. 546 547 Specify the preallocated storage with either nz or nnz (not both). 548 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 549 allocation. See Users-Manual: ch_mat for details. 550 matrices. 551 552 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 553 554 @*/ 555 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 556 { 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 ierr = MatCreate(comm,A);CHKERRQ(ierr); 561 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 562 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 563 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 564 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 565 #else 566 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 567 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 568 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 569 #endif 570 PetscFunctionReturn(0); 571 } 572 573 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 574 { 575 PetscErrorCode ierr; 576 577 PetscFunctionBegin; 578 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 579 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 580 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 581 #else 582 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 583 #endif 584 PetscFunctionReturn(0); 585 } 586