1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->scale = MatScale_SeqAIJ; 51 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 52 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 53 B->ops->axpy = MatAXPY_SeqAIJ; 54 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 64 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 65 66 if (aijmkl->sparse_optimized) { 67 sparse_status_t stat; 68 stat = mkl_sparse_destroy(aijmkl->csrA); 69 if (stat != SPARSE_STATUS_SUCCESS) { 70 PetscFunctionReturn(PETSC_ERR_LIB); 71 } 72 } 73 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 74 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 75 76 /* Change the type of B to MATSEQAIJ. */ 77 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 78 79 *newmat = B; 80 PetscFunctionReturn(0); 81 } 82 83 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 84 { 85 PetscErrorCode ierr; 86 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 87 88 PetscFunctionBegin; 89 90 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 91 * spptr pointer. */ 92 if (aijmkl) { 93 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 94 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 95 if (aijmkl->sparse_optimized) { 96 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 97 stat = mkl_sparse_destroy(aijmkl->csrA); 98 if (stat != SPARSE_STATUS_SUCCESS) { 99 PetscFunctionReturn(PETSC_ERR_LIB); 100 } 101 } 102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 103 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 104 } 105 106 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 107 * to destroy everything that remains. */ 108 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 109 /* Note that I don't call MatSetType(). I believe this is because that 110 * is only to be called when *building* a matrix. I could be wrong, but 111 * that is how things work for the SuperLU matrix class. */ 112 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 117 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 118 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 119 * handle, creates a new one, and then calls mkl_sparse_optimize(). 120 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 121 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 122 * an unoptimized matrix handle here. */ 123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 124 { 125 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 126 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 127 * does nothing. We make it callable anyway in this case because it cuts 128 * down on littering the code with #ifdefs. */ 129 PetscFunctionReturn(0); 130 #else 131 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 132 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 133 PetscInt m,n; 134 MatScalar *aa; 135 PetscInt *aj,*ai; 136 sparse_status_t stat; 137 138 PetscFunctionBegin; 139 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 140 141 if (aijmkl->sparse_optimized) { 142 /* Matrix has been previously assembled and optimized. Must destroy old 143 * matrix handle before running the optimization step again. */ 144 stat = mkl_sparse_destroy(aijmkl->csrA); 145 if (stat != SPARSE_STATUS_SUCCESS) { 146 PetscFunctionReturn(PETSC_ERR_LIB); 147 } 148 } 149 aijmkl->sparse_optimized = PETSC_FALSE; 150 151 /* Now perform the SpMV2 setup and matrix optimization. */ 152 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 155 m = A->rmap->n; 156 n = A->cmap->n; 157 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158 aa = a->a; /* Nonzero elements stored row-by-row. */ 159 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 160 if ((a->nz!=0) & !(A->structure_only)) { 161 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 162 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 163 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 165 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 166 stat = mkl_sparse_optimize(aijmkl->csrA); 167 if (stat != SPARSE_STATUS_SUCCESS) { 168 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 169 PetscFunctionReturn(PETSC_ERR_LIB); 170 } 171 aijmkl->sparse_optimized = PETSC_TRUE; 172 } 173 174 PetscFunctionReturn(0); 175 #endif 176 } 177 178 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 179 { 180 PetscErrorCode ierr; 181 Mat_SeqAIJMKL *aijmkl; 182 Mat_SeqAIJMKL *aijmkl_dest; 183 184 PetscFunctionBegin; 185 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 186 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 187 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 188 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 189 aijmkl_dest->sparse_optimized = PETSC_FALSE; 190 if (aijmkl->eager_inspection) { 191 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 197 { 198 PetscErrorCode ierr; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 Mat_SeqAIJMKL *aijmkl; 201 202 PetscFunctionBegin; 203 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 204 205 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 206 * extra information and some different methods, call the AssemblyEnd 207 * routine for a MATSEQAIJ. 208 * I'm not sure if this is the best way to do this, but it avoids 209 * a lot of code duplication. */ 210 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 211 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 212 213 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 214 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 215 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 216 if (aijmkl->eager_inspection) { 217 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 218 } 219 220 PetscFunctionReturn(0); 221 } 222 223 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 224 { 225 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 226 const PetscScalar *x; 227 PetscScalar *y; 228 const MatScalar *aa; 229 PetscErrorCode ierr; 230 PetscInt m=A->rmap->n; 231 PetscInt n=A->cmap->n; 232 PetscScalar alpha = 1.0; 233 PetscScalar beta = 0.0; 234 const PetscInt *aj,*ai; 235 char matdescra[6]; 236 237 238 /* Variables not in MatMult_SeqAIJ. */ 239 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 240 241 PetscFunctionBegin; 242 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 243 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 244 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 246 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 247 aa = a->a; /* Nonzero elements stored row-by-row. */ 248 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 249 250 /* Call MKL sparse BLAS routine to do the MatMult. */ 251 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 252 253 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 254 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 255 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 260 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 263 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 264 const PetscScalar *x; 265 PetscScalar *y; 266 PetscErrorCode ierr; 267 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 268 269 PetscFunctionBegin; 270 271 /* If there are no nonzero entries, zero yy and return immediately. */ 272 if(!a->nz) { 273 PetscInt i; 274 PetscInt m=A->rmap->n; 275 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 276 for (i=0; i<m; i++) { 277 y[i] = 0.0; 278 } 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 285 286 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 287 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 288 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 289 if (!aijmkl->sparse_optimized) { 290 MatSeqAIJMKL_create_mkl_handle(A); 291 } 292 293 /* Call MKL SpMV2 executor routine to do the MatMult. */ 294 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 295 296 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 297 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 298 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 299 if (stat != SPARSE_STATUS_SUCCESS) { 300 PetscFunctionReturn(PETSC_ERR_LIB); 301 } 302 PetscFunctionReturn(0); 303 } 304 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 305 306 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 307 { 308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 309 const PetscScalar *x; 310 PetscScalar *y; 311 const MatScalar *aa; 312 PetscErrorCode ierr; 313 PetscInt m=A->rmap->n; 314 PetscInt n=A->cmap->n; 315 PetscScalar alpha = 1.0; 316 PetscScalar beta = 0.0; 317 const PetscInt *aj,*ai; 318 char matdescra[6]; 319 320 /* Variables not in MatMultTranspose_SeqAIJ. */ 321 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 322 323 PetscFunctionBegin; 324 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 325 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 326 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 329 aa = a->a; /* Nonzero elements stored row-by-row. */ 330 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 331 332 /* Call MKL sparse BLAS routine to do the MatMult. */ 333 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 334 335 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 342 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 345 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 346 const PetscScalar *x; 347 PetscScalar *y; 348 PetscErrorCode ierr; 349 sparse_status_t stat; 350 351 PetscFunctionBegin; 352 353 /* If there are no nonzero entries, zero yy and return immediately. */ 354 if(!a->nz) { 355 PetscInt i; 356 PetscInt n=A->cmap->n; 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 for (i=0; i<n; i++) { 359 y[i] = 0.0; 360 } 361 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 367 368 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 369 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 370 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 371 if (!aijmkl->sparse_optimized) { 372 MatSeqAIJMKL_create_mkl_handle(A); 373 } 374 375 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 376 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 377 378 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 379 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 380 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 381 if (stat != SPARSE_STATUS_SUCCESS) { 382 PetscFunctionReturn(PETSC_ERR_LIB); 383 } 384 PetscFunctionReturn(0); 385 } 386 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 387 388 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 389 { 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 391 const PetscScalar *x; 392 PetscScalar *y,*z; 393 const MatScalar *aa; 394 PetscErrorCode ierr; 395 PetscInt m=A->rmap->n; 396 PetscInt n=A->cmap->n; 397 const PetscInt *aj,*ai; 398 PetscInt i; 399 400 /* Variables not in MatMultAdd_SeqAIJ. */ 401 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 402 PetscScalar alpha = 1.0; 403 PetscScalar beta; 404 char matdescra[6]; 405 406 PetscFunctionBegin; 407 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 408 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 409 410 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 412 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 413 aa = a->a; /* Nonzero elements stored row-by-row. */ 414 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 415 416 /* Call MKL sparse BLAS routine to do the MatMult. */ 417 if (zz == yy) { 418 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 419 beta = 1.0; 420 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 421 } else { 422 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 423 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 424 beta = 0.0; 425 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 426 for (i=0; i<m; i++) { 427 z[i] += y[i]; 428 } 429 } 430 431 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 433 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 438 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 439 { 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 442 const PetscScalar *x; 443 PetscScalar *y,*z; 444 PetscErrorCode ierr; 445 PetscInt m=A->rmap->n; 446 PetscInt i; 447 448 /* Variables not in MatMultAdd_SeqAIJ. */ 449 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 450 451 PetscFunctionBegin; 452 453 /* If there are no nonzero entries, set zz = yy and return immediately. */ 454 if(!a->nz) { 455 PetscInt i; 456 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 457 for (i=0; i<m; i++) { 458 z[i] = y[i]; 459 } 460 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 466 467 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 468 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 469 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 470 if (!aijmkl->sparse_optimized) { 471 MatSeqAIJMKL_create_mkl_handle(A); 472 } 473 474 /* Call MKL sparse BLAS routine to do the MatMult. */ 475 if (zz == yy) { 476 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 477 * with alpha and beta both set to 1.0. */ 478 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 479 } else { 480 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 481 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 482 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 483 for (i=0; i<m; i++) { 484 z[i] += y[i]; 485 } 486 } 487 488 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 491 if (stat != SPARSE_STATUS_SUCCESS) { 492 PetscFunctionReturn(PETSC_ERR_LIB); 493 } 494 PetscFunctionReturn(0); 495 } 496 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 497 498 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 499 { 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 501 const PetscScalar *x; 502 PetscScalar *y,*z; 503 const MatScalar *aa; 504 PetscErrorCode ierr; 505 PetscInt m=A->rmap->n; 506 PetscInt n=A->cmap->n; 507 const PetscInt *aj,*ai; 508 PetscInt i; 509 510 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 511 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 512 PetscScalar alpha = 1.0; 513 PetscScalar beta; 514 char matdescra[6]; 515 516 PetscFunctionBegin; 517 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 518 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 519 520 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 521 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 522 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 523 aa = a->a; /* Nonzero elements stored row-by-row. */ 524 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 525 526 /* Call MKL sparse BLAS routine to do the MatMult. */ 527 if (zz == yy) { 528 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 529 beta = 1.0; 530 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 531 } else { 532 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 533 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 534 beta = 0.0; 535 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 536 for (i=0; i<n; i++) { 537 z[i] += y[i]; 538 } 539 } 540 541 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 542 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 548 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 552 const PetscScalar *x; 553 PetscScalar *y,*z; 554 PetscErrorCode ierr; 555 PetscInt n=A->cmap->n; 556 PetscInt i; 557 558 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 559 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 560 561 PetscFunctionBegin; 562 563 /* If there are no nonzero entries, set zz = yy and return immediately. */ 564 if(!a->nz) { 565 PetscInt i; 566 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567 for (i=0; i<n; i++) { 568 z[i] = y[i]; 569 } 570 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 575 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 576 577 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 578 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 579 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 580 if (!aijmkl->sparse_optimized) { 581 MatSeqAIJMKL_create_mkl_handle(A); 582 } 583 584 /* Call MKL sparse BLAS routine to do the MatMult. */ 585 if (zz == yy) { 586 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 587 * with alpha and beta both set to 1.0. */ 588 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 589 } else { 590 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 591 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 592 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 593 for (i=0; i<n; i++) { 594 z[i] += y[i]; 595 } 596 } 597 598 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 599 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 600 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 601 if (stat != SPARSE_STATUS_SUCCESS) { 602 PetscFunctionReturn(PETSC_ERR_LIB); 603 } 604 PetscFunctionReturn(0); 605 } 606 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 607 608 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 609 { 610 PetscErrorCode ierr; 611 612 PetscFunctionBegin; 613 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 614 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 619 { 620 PetscErrorCode ierr; 621 622 PetscFunctionBegin; 623 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 624 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 629 { 630 PetscErrorCode ierr; 631 632 PetscFunctionBegin; 633 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 634 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 639 { 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 644 if (str == SAME_NONZERO_PATTERN) { 645 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 646 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 652 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 653 * routine, but can also be used to convert an assembled SeqAIJ matrix 654 * into a SeqAIJMKL one. */ 655 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 656 { 657 PetscErrorCode ierr; 658 Mat B = *newmat; 659 Mat_SeqAIJMKL *aijmkl; 660 PetscBool set; 661 PetscBool sametype; 662 663 PetscFunctionBegin; 664 if (reuse == MAT_INITIAL_MATRIX) { 665 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 666 } 667 668 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 669 if (sametype) PetscFunctionReturn(0); 670 671 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 672 B->spptr = (void*) aijmkl; 673 674 /* Set function pointers for methods that we inherit from AIJ but override. 675 * We also parse some command line options below, since those determine some of the methods we point to. */ 676 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 677 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 678 B->ops->destroy = MatDestroy_SeqAIJMKL; 679 680 aijmkl->sparse_optimized = PETSC_FALSE; 681 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 682 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 683 #else 684 aijmkl->no_SpMV2 = PETSC_TRUE; 685 #endif 686 aijmkl->eager_inspection = PETSC_FALSE; 687 688 /* Parse command line options. */ 689 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 690 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 691 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 692 ierr = PetscOptionsEnd();CHKERRQ(ierr); 693 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 694 if(!aijmkl->no_SpMV2) { 695 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 696 aijmkl->no_SpMV2 = PETSC_TRUE; 697 } 698 #endif 699 700 if(!aijmkl->no_SpMV2) { 701 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 702 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 703 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 704 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 705 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 706 #endif 707 } else { 708 B->ops->mult = MatMult_SeqAIJMKL; 709 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 710 B->ops->multadd = MatMultAdd_SeqAIJMKL; 711 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 712 } 713 714 B->ops->scale = MatScale_SeqAIJMKL; 715 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 716 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 717 B->ops->axpy = MatAXPY_SeqAIJMKL; 718 719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 722 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 724 725 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 726 *newmat = B; 727 PetscFunctionReturn(0); 728 } 729 730 /*@C 731 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 732 This type inherits from AIJ and is largely identical, but uses sparse BLAS 733 routines from Intel MKL whenever possible. 734 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 735 operations are currently supported. 736 If the installed version of MKL supports the "SpMV2" sparse 737 inspector-executor routines, then those are used by default. 738 739 Collective on MPI_Comm 740 741 Input Parameters: 742 + comm - MPI communicator, set to PETSC_COMM_SELF 743 . m - number of rows 744 . n - number of columns 745 . nz - number of nonzeros per row (same for all rows) 746 - nnz - array containing the number of nonzeros in the various rows 747 (possibly different for each row) or NULL 748 749 Output Parameter: 750 . A - the matrix 751 752 Options Database Keys: 753 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 754 755 Notes: 756 If nnz is given then nz is ignored 757 758 Level: intermediate 759 760 .keywords: matrix, MKL, sparse, parallel 761 762 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 763 @*/ 764 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 765 { 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 ierr = MatCreate(comm,A);CHKERRQ(ierr); 770 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 771 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 772 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 777 { 778 PetscErrorCode ierr; 779 780 PetscFunctionBegin; 781 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 782 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 783 PetscFunctionReturn(0); 784 } 785