1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 33 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 } 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 B->ops->scale = MatScale_SeqAIJ; 50 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 51 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 52 B->ops->axpy = MatAXPY_SeqAIJ; 53 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 59 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 60 * simply involves destroying the MKL sparse matrix handle and then freeing 61 * the spptr pointer. */ 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 64 65 if (aijmkl->sparse_optimized) { 66 sparse_status_t stat; 67 stat = mkl_sparse_destroy(aijmkl->csrA); 68 if (stat != SPARSE_STATUS_SUCCESS) { 69 PetscFunctionReturn(PETSC_ERR_LIB); 70 } 71 } 72 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 73 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 74 75 /* Change the type of B to MATSEQAIJ. */ 76 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 77 78 *newmat = B; 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 83 { 84 PetscErrorCode ierr; 85 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 86 87 PetscFunctionBegin; 88 89 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 90 * spptr pointer. */ 91 if (aijmkl) { 92 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 93 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 94 if (aijmkl->sparse_optimized) { 95 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 96 stat = mkl_sparse_destroy(aijmkl->csrA); 97 if (stat != SPARSE_STATUS_SUCCESS) { 98 PetscFunctionReturn(PETSC_ERR_LIB); 99 } 100 } 101 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 102 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 103 } 104 105 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 106 * to destroy everything that remains. */ 107 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 108 /* Note that I don't call MatSetType(). I believe this is because that 109 * is only to be called when *building* a matrix. I could be wrong, but 110 * that is how things work for the SuperLU matrix class. */ 111 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 116 { 117 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 118 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 119 * does nothing. We make it callable anyway in this case because it cuts 120 * down on littering the code with #ifdefs. */ 121 PetscFunctionReturn(0); 122 #else 123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 124 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 125 PetscInt m,n; 126 MatScalar *aa; 127 PetscInt *aj,*ai; 128 sparse_status_t stat; 129 130 PetscFunctionBegin; 131 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 132 133 if (aijmkl->sparse_optimized) { 134 /* Matrix has been previously assembled and optimized. Must destroy old 135 * matrix handle before running the optimization step again. */ 136 stat = mkl_sparse_destroy(aijmkl->csrA); 137 if (stat != SPARSE_STATUS_SUCCESS) { 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 } 141 aijmkl->sparse_optimized = PETSC_FALSE; 142 143 /* Now perform the SpMV2 setup and matrix optimization. */ 144 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 147 m = A->rmap->n; 148 n = A->cmap->n; 149 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150 aa = a->a; /* Nonzero elements stored row-by-row. */ 151 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 152 if ((a->nz!=0) & !(A->structure_only)) { 153 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 154 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 155 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158 stat = mkl_sparse_optimize(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) { 160 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161 PetscFunctionReturn(PETSC_ERR_LIB); 162 } 163 aijmkl->sparse_optimized = PETSC_TRUE; 164 } 165 166 PetscFunctionReturn(0); 167 #endif 168 } 169 170 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqAIJMKL *aijmkl; 174 Mat_SeqAIJMKL *aijmkl_dest; 175 176 PetscFunctionBegin; 177 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 178 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 179 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 180 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 181 aijmkl_dest->sparse_optimized = PETSC_FALSE; 182 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 187 { 188 PetscErrorCode ierr; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 191 PetscFunctionBegin; 192 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 193 194 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 195 * extra information and some different methods, call the AssemblyEnd 196 * routine for a MATSEQAIJ. 197 * I'm not sure if this is the best way to do this, but it avoids 198 * a lot of code duplication. */ 199 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 200 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 201 202 /* Now create the MKL sparse handle (if needed; the function checks). */ 203 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 204 205 PetscFunctionReturn(0); 206 } 207 208 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 209 { 210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 211 const PetscScalar *x; 212 PetscScalar *y; 213 const MatScalar *aa; 214 PetscErrorCode ierr; 215 PetscInt m=A->rmap->n; 216 PetscInt n=A->cmap->n; 217 PetscScalar alpha = 1.0; 218 PetscScalar beta = 0.0; 219 const PetscInt *aj,*ai; 220 char matdescra[6]; 221 222 223 /* Variables not in MatMult_SeqAIJ. */ 224 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 225 226 PetscFunctionBegin; 227 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 228 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 229 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 230 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 232 aa = a->a; /* Nonzero elements stored row-by-row. */ 233 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 234 235 /* Call MKL sparse BLAS routine to do the MatMult. */ 236 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 237 238 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 240 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 245 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 246 { 247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 249 const PetscScalar *x; 250 PetscScalar *y; 251 PetscErrorCode ierr; 252 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 253 254 PetscFunctionBegin; 255 256 /* If there are no nonzero entries, zero yy and return immediately. */ 257 if(!a->nz) { 258 PetscInt i; 259 PetscInt m=A->rmap->n; 260 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 261 for (i=0; i<m; i++) { 262 y[i] = 0.0; 263 } 264 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 269 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 270 271 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 272 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 273 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 274 if (!aijmkl->sparse_optimized) { 275 MatSeqAIJMKL_create_mkl_handle(A); 276 } 277 278 /* Call MKL SpMV2 executor routine to do the MatMult. */ 279 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 280 281 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 282 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 283 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 284 if (stat != SPARSE_STATUS_SUCCESS) { 285 PetscFunctionReturn(PETSC_ERR_LIB); 286 } 287 PetscFunctionReturn(0); 288 } 289 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 290 291 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 292 { 293 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 294 const PetscScalar *x; 295 PetscScalar *y; 296 const MatScalar *aa; 297 PetscErrorCode ierr; 298 PetscInt m=A->rmap->n; 299 PetscInt n=A->cmap->n; 300 PetscScalar alpha = 1.0; 301 PetscScalar beta = 0.0; 302 const PetscInt *aj,*ai; 303 char matdescra[6]; 304 305 /* Variables not in MatMultTranspose_SeqAIJ. */ 306 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 307 308 PetscFunctionBegin; 309 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 310 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 311 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 312 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 313 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 314 aa = a->a; /* Nonzero elements stored row-by-row. */ 315 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 316 317 /* Call MKL sparse BLAS routine to do the MatMult. */ 318 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 319 320 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 321 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 322 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 327 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 328 { 329 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 330 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 331 const PetscScalar *x; 332 PetscScalar *y; 333 PetscErrorCode ierr; 334 sparse_status_t stat; 335 336 PetscFunctionBegin; 337 338 /* If there are no nonzero entries, zero yy and return immediately. */ 339 if(!a->nz) { 340 PetscInt i; 341 PetscInt n=A->cmap->n; 342 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 343 for (i=0; i<n; i++) { 344 y[i] = 0.0; 345 } 346 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 352 353 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 354 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 355 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 356 if (!aijmkl->sparse_optimized) { 357 MatSeqAIJMKL_create_mkl_handle(A); 358 } 359 360 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 361 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 362 363 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 365 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 366 if (stat != SPARSE_STATUS_SUCCESS) { 367 PetscFunctionReturn(PETSC_ERR_LIB); 368 } 369 PetscFunctionReturn(0); 370 } 371 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 372 373 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 374 { 375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 376 const PetscScalar *x; 377 PetscScalar *y,*z; 378 const MatScalar *aa; 379 PetscErrorCode ierr; 380 PetscInt m=A->rmap->n; 381 PetscInt n=A->cmap->n; 382 const PetscInt *aj,*ai; 383 PetscInt i; 384 385 /* Variables not in MatMultAdd_SeqAIJ. */ 386 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 387 PetscScalar alpha = 1.0; 388 PetscScalar beta; 389 char matdescra[6]; 390 391 PetscFunctionBegin; 392 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 393 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 394 395 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 397 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 398 aa = a->a; /* Nonzero elements stored row-by-row. */ 399 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 400 401 /* Call MKL sparse BLAS routine to do the MatMult. */ 402 if (zz == yy) { 403 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 404 beta = 1.0; 405 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 406 } else { 407 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 408 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 409 beta = 0.0; 410 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 411 for (i=0; i<m; i++) { 412 z[i] += y[i]; 413 } 414 } 415 416 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 418 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 423 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 426 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 427 const PetscScalar *x; 428 PetscScalar *y,*z; 429 PetscErrorCode ierr; 430 PetscInt m=A->rmap->n; 431 PetscInt i; 432 433 /* Variables not in MatMultAdd_SeqAIJ. */ 434 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 435 436 PetscFunctionBegin; 437 438 /* If there are no nonzero entries, set zz = yy and return immediately. */ 439 if(!a->nz) { 440 PetscInt i; 441 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 442 for (i=0; i<m; i++) { 443 z[i] = y[i]; 444 } 445 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 451 452 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 453 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 454 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 455 if (!aijmkl->sparse_optimized) { 456 MatSeqAIJMKL_create_mkl_handle(A); 457 } 458 459 /* Call MKL sparse BLAS routine to do the MatMult. */ 460 if (zz == yy) { 461 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 462 * with alpha and beta both set to 1.0. */ 463 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 464 } else { 465 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 466 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 467 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 468 for (i=0; i<m; i++) { 469 z[i] += y[i]; 470 } 471 } 472 473 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 475 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 476 if (stat != SPARSE_STATUS_SUCCESS) { 477 PetscFunctionReturn(PETSC_ERR_LIB); 478 } 479 PetscFunctionReturn(0); 480 } 481 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 482 483 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 484 { 485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 486 const PetscScalar *x; 487 PetscScalar *y,*z; 488 const MatScalar *aa; 489 PetscErrorCode ierr; 490 PetscInt m=A->rmap->n; 491 PetscInt n=A->cmap->n; 492 const PetscInt *aj,*ai; 493 PetscInt i; 494 495 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 496 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 497 PetscScalar alpha = 1.0; 498 PetscScalar beta; 499 char matdescra[6]; 500 501 PetscFunctionBegin; 502 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 503 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 504 505 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 507 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 508 aa = a->a; /* Nonzero elements stored row-by-row. */ 509 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 510 511 /* Call MKL sparse BLAS routine to do the MatMult. */ 512 if (zz == yy) { 513 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 514 beta = 1.0; 515 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 516 } else { 517 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 518 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 519 beta = 0.0; 520 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 521 for (i=0; i<n; i++) { 522 z[i] += y[i]; 523 } 524 } 525 526 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 527 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 533 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 534 { 535 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 536 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 537 const PetscScalar *x; 538 PetscScalar *y,*z; 539 PetscErrorCode ierr; 540 PetscInt n=A->cmap->n; 541 PetscInt i; 542 543 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 544 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 545 546 PetscFunctionBegin; 547 548 /* If there are no nonzero entries, set zz = yy and return immediately. */ 549 if(!a->nz) { 550 PetscInt i; 551 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 552 for (i=0; i<n; i++) { 553 z[i] = y[i]; 554 } 555 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 560 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 561 562 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 563 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 564 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 565 if (!aijmkl->sparse_optimized) { 566 MatSeqAIJMKL_create_mkl_handle(A); 567 } 568 569 /* Call MKL sparse BLAS routine to do the MatMult. */ 570 if (zz == yy) { 571 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 572 * with alpha and beta both set to 1.0. */ 573 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 574 } else { 575 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 576 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 577 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 578 for (i=0; i<n; i++) { 579 z[i] += y[i]; 580 } 581 } 582 583 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 585 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 586 if (stat != SPARSE_STATUS_SUCCESS) { 587 PetscFunctionReturn(PETSC_ERR_LIB); 588 } 589 PetscFunctionReturn(0); 590 } 591 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 592 593 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 594 { 595 PetscErrorCode ierr; 596 597 PetscFunctionBegin; 598 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 599 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 604 { 605 PetscErrorCode ierr; 606 607 PetscFunctionBegin; 608 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 609 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 614 { 615 PetscErrorCode ierr; 616 617 PetscFunctionBegin; 618 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 619 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 624 { 625 PetscErrorCode ierr; 626 627 PetscFunctionBegin; 628 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 629 if (str == SAME_NONZERO_PATTERN) { 630 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 631 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 632 } 633 PetscFunctionReturn(0); 634 } 635 636 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 637 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 638 * routine, but can also be used to convert an assembled SeqAIJ matrix 639 * into a SeqAIJMKL one. */ 640 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 641 { 642 PetscErrorCode ierr; 643 Mat B = *newmat; 644 Mat_SeqAIJMKL *aijmkl; 645 PetscBool set; 646 PetscBool sametype; 647 648 PetscFunctionBegin; 649 if (reuse == MAT_INITIAL_MATRIX) { 650 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 651 } 652 653 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 654 if (sametype) PetscFunctionReturn(0); 655 656 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 657 B->spptr = (void*) aijmkl; 658 659 /* Set function pointers for methods that we inherit from AIJ but override. 660 * We also parse some command line options below, since those determine some of the methods we point to. */ 661 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 662 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 663 B->ops->destroy = MatDestroy_SeqAIJMKL; 664 665 aijmkl->sparse_optimized = PETSC_FALSE; 666 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 667 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 668 #else 669 aijmkl->no_SpMV2 = PETSC_TRUE; 670 #endif 671 672 /* Parse command line options. */ 673 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 674 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 676 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 677 if(!aijmkl->no_SpMV2) { 678 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 679 aijmkl->no_SpMV2 = PETSC_TRUE; 680 } 681 #endif 682 683 if(!aijmkl->no_SpMV2) { 684 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 685 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 686 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 687 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 688 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 689 #endif 690 } else { 691 B->ops->mult = MatMult_SeqAIJMKL; 692 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 693 B->ops->multadd = MatMultAdd_SeqAIJMKL; 694 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 695 } 696 697 B->ops->scale = MatScale_SeqAIJMKL; 698 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 699 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 700 B->ops->axpy = MatAXPY_SeqAIJMKL; 701 702 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 703 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 704 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 705 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 707 708 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 709 *newmat = B; 710 PetscFunctionReturn(0); 711 } 712 713 /*@C 714 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 715 This type inherits from AIJ and is largely identical, but uses sparse BLAS 716 routines from Intel MKL whenever possible. 717 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 718 operations are currently supported. 719 If the installed version of MKL supports the "SpMV2" sparse 720 inspector-executor routines, then those are used by default. 721 722 Collective on MPI_Comm 723 724 Input Parameters: 725 + comm - MPI communicator, set to PETSC_COMM_SELF 726 . m - number of rows 727 . n - number of columns 728 . nz - number of nonzeros per row (same for all rows) 729 - nnz - array containing the number of nonzeros in the various rows 730 (possibly different for each row) or NULL 731 732 Output Parameter: 733 . A - the matrix 734 735 Options Database Keys: 736 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 737 738 Notes: 739 If nnz is given then nz is ignored 740 741 Level: intermediate 742 743 .keywords: matrix, MKL, sparse, parallel 744 745 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 746 @*/ 747 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 748 { 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatCreate(comm,A);CHKERRQ(ierr); 753 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 754 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 755 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 760 { 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 765 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } 768