1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 B->ops->scale = MatScale_SeqAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 50 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 51 B->ops->axpy = MatAXPY_SeqAIJ; 52 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 58 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 59 * simply involves destroying the MKL sparse matrix handle and then freeing 60 * the spptr pointer. */ 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if (aijmkl->sparse_optimized) { 63 sparse_status_t stat; 64 stat = mkl_sparse_destroy(aijmkl->csrA); 65 if (stat != SPARSE_STATUS_SUCCESS) { 66 PetscFunctionReturn(PETSC_ERR_LIB); 67 } 68 } 69 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 70 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 71 72 /* Change the type of B to MATSEQAIJ. */ 73 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 74 75 *newmat = B; 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 113 { 114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115 Mat_SeqAIJMKL *aijmkl; 116 PetscInt m,n; 117 MatScalar *aa; 118 PetscInt *aj,*ai; 119 120 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 121 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 122 * does nothing. We make it callable anyway in this case because it cuts 123 * down on littering the code with #ifdefs. */ 124 PetscFunctionReturn(0); 125 #else 126 127 sparse_status_t stat; 128 129 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 130 131 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 132 133 if (aijmkl->sparse_optimized) { 134 /* Matrix has been previously assembled and optimized. Must destroy old 135 * matrix handle before running the optimization step again. */ 136 stat = mkl_sparse_destroy(aijmkl->csrA); 137 if (stat != SPARSE_STATUS_SUCCESS) { 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 } 141 aijmkl->sparse_optimized = PETSC_FALSE; 142 143 /* Now perform the SpMV2 setup and matrix optimization. */ 144 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 147 m = A->rmap->n; 148 n = A->cmap->n; 149 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150 aa = a->a; /* Nonzero elements stored row-by-row. */ 151 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 152 if (a->nz) { 153 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 154 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 155 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158 stat = mkl_sparse_optimize(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) { 160 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161 PetscFunctionReturn(PETSC_ERR_LIB); 162 } 163 aijmkl->sparse_optimized = PETSC_TRUE; 164 } 165 166 PetscFunctionReturn(0); 167 #endif 168 } 169 170 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqAIJMKL *aijmkl; 174 Mat_SeqAIJMKL *aijmkl_dest; 175 176 PetscFunctionBegin; 177 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 178 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 179 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 180 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 181 aijmkl_dest->sparse_optimized = PETSC_FALSE; 182 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 187 { 188 PetscErrorCode ierr; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 191 PetscFunctionBegin; 192 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 193 194 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 195 * extra information and some different methods, call the AssemblyEnd 196 * routine for a MATSEQAIJ. 197 * I'm not sure if this is the best way to do this, but it avoids 198 * a lot of code duplication. */ 199 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 200 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 201 202 /* Now create the MKL sparse handle (if needed; the function checks). */ 203 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 204 205 PetscFunctionReturn(0); 206 } 207 208 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 209 { 210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 211 const PetscScalar *x; 212 PetscScalar *y; 213 const MatScalar *aa; 214 PetscErrorCode ierr; 215 PetscInt m=A->rmap->n; 216 PetscInt n=A->cmap->n; 217 PetscScalar alpha = 1.0; 218 PetscScalar beta = 0.0; 219 const PetscInt *aj,*ai; 220 char matdescra[6]; 221 222 223 /* Variables not in MatMult_SeqAIJ. */ 224 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 225 226 PetscFunctionBegin; 227 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 228 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 229 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 230 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 232 aa = a->a; /* Nonzero elements stored row-by-row. */ 233 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 234 235 /* Call MKL sparse BLAS routine to do the MatMult. */ 236 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 237 238 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 240 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 245 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 246 { 247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 249 const PetscScalar *x; 250 PetscScalar *y; 251 PetscErrorCode ierr; 252 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 253 254 PetscFunctionBegin; 255 256 /* If there are no nonzero entries, this is a no-op: return immediately. */ 257 if(!a->nz) PetscFunctionReturn(0); 258 259 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 260 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 261 262 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 263 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 264 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 265 if (!aijmkl->sparse_optimized) { 266 MatSeqAIJMKL_create_mkl_handle(A); 267 } 268 269 /* Call MKL SpMV2 executor routine to do the MatMult. */ 270 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 271 272 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 273 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 274 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 275 if (stat != SPARSE_STATUS_SUCCESS) { 276 PetscFunctionReturn(PETSC_ERR_LIB); 277 } 278 PetscFunctionReturn(0); 279 } 280 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 281 282 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 283 { 284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 285 const PetscScalar *x; 286 PetscScalar *y; 287 const MatScalar *aa; 288 PetscErrorCode ierr; 289 PetscInt m=A->rmap->n; 290 PetscInt n=A->cmap->n; 291 PetscScalar alpha = 1.0; 292 PetscScalar beta = 0.0; 293 const PetscInt *aj,*ai; 294 char matdescra[6]; 295 296 /* Variables not in MatMultTranspose_SeqAIJ. */ 297 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 298 299 PetscFunctionBegin; 300 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 301 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 302 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 303 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 304 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 305 aa = a->a; /* Nonzero elements stored row-by-row. */ 306 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 307 308 /* Call MKL sparse BLAS routine to do the MatMult. */ 309 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 310 311 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 312 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 313 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 318 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 319 { 320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 321 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 322 const PetscScalar *x; 323 PetscScalar *y; 324 PetscErrorCode ierr; 325 sparse_status_t stat; 326 327 PetscFunctionBegin; 328 329 /* If there are no nonzero entries, this is a no-op: return immediately. */ 330 if(!a->nz) PetscFunctionReturn(0); 331 332 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 333 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 334 335 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 336 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 337 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 338 if (!aijmkl->sparse_optimized) { 339 MatSeqAIJMKL_create_mkl_handle(A); 340 } 341 342 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 343 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 344 345 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 347 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 348 if (stat != SPARSE_STATUS_SUCCESS) { 349 PetscFunctionReturn(PETSC_ERR_LIB); 350 } 351 PetscFunctionReturn(0); 352 } 353 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 354 355 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 356 { 357 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 358 const PetscScalar *x; 359 PetscScalar *y,*z; 360 const MatScalar *aa; 361 PetscErrorCode ierr; 362 PetscInt m=A->rmap->n; 363 PetscInt n=A->cmap->n; 364 const PetscInt *aj,*ai; 365 PetscInt i; 366 367 /* Variables not in MatMultAdd_SeqAIJ. */ 368 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 369 PetscScalar alpha = 1.0; 370 PetscScalar beta; 371 char matdescra[6]; 372 373 PetscFunctionBegin; 374 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 375 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 376 377 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 378 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 379 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 380 aa = a->a; /* Nonzero elements stored row-by-row. */ 381 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 382 383 /* Call MKL sparse BLAS routine to do the MatMult. */ 384 if (zz == yy) { 385 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 386 beta = 1.0; 387 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 388 } else { 389 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 390 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 391 beta = 0.0; 392 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 393 for (i=0; i<m; i++) { 394 z[i] += y[i]; 395 } 396 } 397 398 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 400 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 405 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 409 const PetscScalar *x; 410 PetscScalar *y,*z; 411 PetscErrorCode ierr; 412 PetscInt m=A->rmap->n; 413 PetscInt i; 414 415 /* Variables not in MatMultAdd_SeqAIJ. */ 416 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 417 418 PetscFunctionBegin; 419 420 /* If there are no nonzero entries, this is a no-op: return immediately. */ 421 if(!a->nz) PetscFunctionReturn(0); 422 423 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 424 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 425 426 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 427 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 428 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 429 if (!aijmkl->sparse_optimized) { 430 MatSeqAIJMKL_create_mkl_handle(A); 431 } 432 433 /* Call MKL sparse BLAS routine to do the MatMult. */ 434 if (zz == yy) { 435 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 436 * with alpha and beta both set to 1.0. */ 437 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 438 } else { 439 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 440 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 441 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 442 for (i=0; i<m; i++) { 443 z[i] += y[i]; 444 } 445 } 446 447 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 449 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 450 if (stat != SPARSE_STATUS_SUCCESS) { 451 PetscFunctionReturn(PETSC_ERR_LIB); 452 } 453 PetscFunctionReturn(0); 454 } 455 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 456 457 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 460 const PetscScalar *x; 461 PetscScalar *y,*z; 462 const MatScalar *aa; 463 PetscErrorCode ierr; 464 PetscInt m=A->rmap->n; 465 PetscInt n=A->cmap->n; 466 const PetscInt *aj,*ai; 467 PetscInt i; 468 469 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 470 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 471 PetscScalar alpha = 1.0; 472 PetscScalar beta; 473 char matdescra[6]; 474 475 PetscFunctionBegin; 476 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 477 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 478 479 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 481 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 482 aa = a->a; /* Nonzero elements stored row-by-row. */ 483 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 484 485 /* Call MKL sparse BLAS routine to do the MatMult. */ 486 if (zz == yy) { 487 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 488 beta = 1.0; 489 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 490 } else { 491 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 492 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 493 beta = 0.0; 494 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 495 for (i=0; i<n; i++) { 496 z[i] += y[i]; 497 } 498 } 499 500 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 502 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 507 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 508 { 509 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 510 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 511 const PetscScalar *x; 512 PetscScalar *y,*z; 513 PetscErrorCode ierr; 514 PetscInt n=A->cmap->n; 515 PetscInt i; 516 517 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 518 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 519 520 PetscFunctionBegin; 521 522 /* If there are no nonzero entries, this is a no-op: return immediately. */ 523 if(!a->nz) PetscFunctionReturn(0); 524 525 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 527 528 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 529 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 530 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 531 if (!aijmkl->sparse_optimized) { 532 MatSeqAIJMKL_create_mkl_handle(A); 533 } 534 535 /* Call MKL sparse BLAS routine to do the MatMult. */ 536 if (zz == yy) { 537 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 538 * with alpha and beta both set to 1.0. */ 539 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 540 } else { 541 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 542 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 543 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 544 for (i=0; i<n; i++) { 545 z[i] += y[i]; 546 } 547 } 548 549 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 550 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 552 if (stat != SPARSE_STATUS_SUCCESS) { 553 PetscFunctionReturn(PETSC_ERR_LIB); 554 } 555 PetscFunctionReturn(0); 556 } 557 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 558 559 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 560 { 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 565 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 570 { 571 PetscErrorCode ierr; 572 573 PetscFunctionBegin; 574 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 575 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 580 { 581 PetscErrorCode ierr; 582 583 PetscFunctionBegin; 584 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 585 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 590 { 591 PetscErrorCode ierr; 592 593 PetscFunctionBegin; 594 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 595 if (str == SAME_NONZERO_PATTERN) { 596 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 597 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 598 } 599 PetscFunctionReturn(0); 600 } 601 602 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 603 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 604 * routine, but can also be used to convert an assembled SeqAIJ matrix 605 * into a SeqAIJMKL one. */ 606 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 607 { 608 PetscErrorCode ierr; 609 Mat B = *newmat; 610 Mat_SeqAIJMKL *aijmkl; 611 PetscBool set; 612 PetscBool sametype; 613 614 PetscFunctionBegin; 615 if (reuse == MAT_INITIAL_MATRIX) { 616 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 617 } 618 619 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 620 if (sametype) PetscFunctionReturn(0); 621 622 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 623 B->spptr = (void*) aijmkl; 624 625 /* Set function pointers for methods that we inherit from AIJ but override. 626 * We also parse some command line options below, since those determine some of the methods we point to. */ 627 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 628 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 629 B->ops->destroy = MatDestroy_SeqAIJMKL; 630 631 aijmkl->sparse_optimized = PETSC_FALSE; 632 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 633 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 634 #elif 635 aijmkl->no_SpMV2 = PETSC_TRUE; 636 #endif 637 638 /* Parse command line options. */ 639 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 640 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 642 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 643 if(!aijmkl->no_SpMV2) { 644 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 645 aijmkl->no_SpMV2 = PETSC_TRUE; 646 } 647 #endif 648 649 if(!aijmkl->no_SpMV2) { 650 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 651 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 652 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 653 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 654 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 655 #endif 656 } else { 657 B->ops->mult = MatMult_SeqAIJMKL; 658 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 659 B->ops->multadd = MatMultAdd_SeqAIJMKL; 660 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 661 } 662 663 B->ops->scale = MatScale_SeqAIJMKL; 664 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 665 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 666 B->ops->axpy = MatAXPY_SeqAIJMKL; 667 668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 673 674 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 675 *newmat = B; 676 PetscFunctionReturn(0); 677 } 678 679 /*@C 680 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 681 This type inherits from AIJ and is largely identical, but uses sparse BLAS 682 routines from Intel MKL whenever possible. 683 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 684 operations are currently supported. 685 If the installed version of MKL supports the "SpMV2" sparse 686 inspector-executor routines, then those are used by default. 687 688 Collective on MPI_Comm 689 690 Input Parameters: 691 + comm - MPI communicator, set to PETSC_COMM_SELF 692 . m - number of rows 693 . n - number of columns 694 . nz - number of nonzeros per row (same for all rows) 695 - nnz - array containing the number of nonzeros in the various rows 696 (possibly different for each row) or NULL 697 698 Output Parameter: 699 . A - the matrix 700 701 Options Database Keys: 702 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 703 704 Notes: 705 If nnz is given then nz is ignored 706 707 Level: intermediate 708 709 .keywords: matrix, MKL, sparse, parallel 710 711 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 712 @*/ 713 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 714 { 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatCreate(comm,A);CHKERRQ(ierr); 719 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 720 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 721 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 726 { 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 731 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734