1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 B->ops->scale = MatScale_SeqAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 50 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 51 B->ops->axpy = MatAXPY_SeqAIJ; 52 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 58 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 59 * simply involves destroying the MKL sparse matrix handle and then freeing 60 * the spptr pointer. */ 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if (aijmkl->sparse_optimized) { 63 sparse_status_t stat; 64 stat = mkl_sparse_destroy(aijmkl->csrA); 65 if (stat != SPARSE_STATUS_SUCCESS) { 66 PetscFunctionReturn(PETSC_ERR_LIB); 67 } 68 } 69 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 70 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 71 72 /* Change the type of B to MATSEQAIJ. */ 73 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 74 75 *newmat = B; 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 113 { 114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115 Mat_SeqAIJMKL *aijmkl; 116 PetscInt m,n; 117 MatScalar *aa; 118 PetscInt *aj,*ai; 119 120 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 121 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 122 * does nothing. We make it callable anyway in this case because it cuts 123 * down on littering the code with #ifdefs. */ 124 PetscFunctionReturn(0); 125 #else 126 127 sparse_status_t stat; 128 129 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 130 131 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 132 133 if (aijmkl->sparse_optimized) { 134 /* Matrix has been previously assembled and optimized. Must destroy old 135 * matrix handle before running the optimization step again. */ 136 stat = mkl_sparse_destroy(aijmkl->csrA); 137 if (stat != SPARSE_STATUS_SUCCESS) { 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 } 141 aijmkl->sparse_optimized = PETSC_FALSE; 142 143 /* Now perform the SpMV2 setup and matrix optimization. */ 144 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 147 m = A->rmap->n; 148 n = A->cmap->n; 149 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150 aa = a->a; /* Nonzero elements stored row-by-row. */ 151 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 152 if (a->nz) { 153 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 154 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 155 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158 stat = mkl_sparse_optimize(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) { 160 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161 PetscFunctionReturn(PETSC_ERR_LIB); 162 } 163 aijmkl->sparse_optimized = PETSC_TRUE; 164 } 165 166 PetscFunctionReturn(0); 167 #endif 168 } 169 170 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqAIJMKL *aijmkl; 174 Mat_SeqAIJMKL *aijmkl_dest; 175 176 PetscFunctionBegin; 177 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 178 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 179 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 180 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 181 aijmkl_dest->sparse_optimized = PETSC_FALSE; 182 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 187 { 188 PetscErrorCode ierr; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 191 PetscFunctionBegin; 192 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 193 194 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 195 * extra information and some different methods, call the AssemblyEnd 196 * routine for a MATSEQAIJ. 197 * I'm not sure if this is the best way to do this, but it avoids 198 * a lot of code duplication. 199 * I also note that currently MATSEQAIJMKL doesn't know anything about 200 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 201 * are many zero rows. If the SeqAIJ assembly end routine decides to use 202 * this, this may break things. (Don't know... haven't looked at it. 203 * Do I need to disable this somehow?) */ 204 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 205 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 206 207 /* Now create the MKL sparse handle (if needed; the function checks). */ 208 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 209 210 PetscFunctionReturn(0); 211 } 212 213 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 214 { 215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 216 const PetscScalar *x; 217 PetscScalar *y; 218 const MatScalar *aa; 219 PetscErrorCode ierr; 220 PetscInt m=A->rmap->n; 221 PetscInt n=A->cmap->n; 222 PetscScalar alpha = 1.0; 223 PetscScalar beta = 0.0; 224 const PetscInt *aj,*ai; 225 char matdescra[6]; 226 227 228 /* Variables not in MatMult_SeqAIJ. */ 229 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 230 231 PetscFunctionBegin; 232 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 233 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 236 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 237 aa = a->a; /* Nonzero elements stored row-by-row. */ 238 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 239 240 /* Call MKL sparse BLAS routine to do the MatMult. */ 241 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 242 243 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 250 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 251 { 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 253 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 254 const PetscScalar *x; 255 PetscScalar *y; 256 PetscErrorCode ierr; 257 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 258 259 PetscFunctionBegin; 260 261 /* If there are no nonzero entries, this is a no-op: return immediately. */ 262 if(!a->nz) PetscFunctionReturn(0); 263 264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 266 267 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 268 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 269 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 270 if (!aijmkl->sparse_optimized) { 271 MatSeqAIJMKL_create_mkl_handle(A); 272 } 273 274 /* Call MKL SpMV2 executor routine to do the MatMult. */ 275 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 276 277 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 278 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 if (stat != SPARSE_STATUS_SUCCESS) { 281 PetscFunctionReturn(PETSC_ERR_LIB); 282 } 283 PetscFunctionReturn(0); 284 } 285 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 286 287 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 288 { 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 290 const PetscScalar *x; 291 PetscScalar *y; 292 const MatScalar *aa; 293 PetscErrorCode ierr; 294 PetscInt m=A->rmap->n; 295 PetscInt n=A->cmap->n; 296 PetscScalar alpha = 1.0; 297 PetscScalar beta = 0.0; 298 const PetscInt *aj,*ai; 299 char matdescra[6]; 300 301 /* Variables not in MatMultTranspose_SeqAIJ. */ 302 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 303 304 PetscFunctionBegin; 305 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 306 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 307 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 308 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 309 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 310 aa = a->a; /* Nonzero elements stored row-by-row. */ 311 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 312 313 /* Call MKL sparse BLAS routine to do the MatMult. */ 314 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 315 316 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 323 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 324 { 325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 326 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 327 const PetscScalar *x; 328 PetscScalar *y; 329 PetscErrorCode ierr; 330 sparse_status_t stat; 331 332 PetscFunctionBegin; 333 334 /* If there are no nonzero entries, this is a no-op: return immediately. */ 335 if(!a->nz) PetscFunctionReturn(0); 336 337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 339 340 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 341 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 342 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 343 if (!aijmkl->sparse_optimized) { 344 MatSeqAIJMKL_create_mkl_handle(A); 345 } 346 347 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 348 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 349 350 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353 if (stat != SPARSE_STATUS_SUCCESS) { 354 PetscFunctionReturn(PETSC_ERR_LIB); 355 } 356 PetscFunctionReturn(0); 357 } 358 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 359 360 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 361 { 362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 363 const PetscScalar *x; 364 PetscScalar *y,*z; 365 const MatScalar *aa; 366 PetscErrorCode ierr; 367 PetscInt m=A->rmap->n; 368 PetscInt n=A->cmap->n; 369 const PetscInt *aj,*ai; 370 PetscInt i; 371 372 /* Variables not in MatMultAdd_SeqAIJ. */ 373 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 374 PetscScalar alpha = 1.0; 375 PetscScalar beta; 376 char matdescra[6]; 377 378 PetscFunctionBegin; 379 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 380 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 381 382 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 384 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 385 aa = a->a; /* Nonzero elements stored row-by-row. */ 386 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 387 388 /* Call MKL sparse BLAS routine to do the MatMult. */ 389 if (zz == yy) { 390 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 391 beta = 1.0; 392 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 393 } else { 394 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 395 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 396 beta = 0.0; 397 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 398 for (i=0; i<m; i++) { 399 z[i] += y[i]; 400 } 401 } 402 403 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 410 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 413 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 414 const PetscScalar *x; 415 PetscScalar *y,*z; 416 PetscErrorCode ierr; 417 PetscInt m=A->rmap->n; 418 PetscInt i; 419 420 /* Variables not in MatMultAdd_SeqAIJ. */ 421 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 422 423 PetscFunctionBegin; 424 425 /* If there are no nonzero entries, this is a no-op: return immediately. */ 426 if(!a->nz) PetscFunctionReturn(0); 427 428 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 429 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 430 431 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 432 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 433 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 434 if (!aijmkl->sparse_optimized) { 435 MatSeqAIJMKL_create_mkl_handle(A); 436 } 437 438 /* Call MKL sparse BLAS routine to do the MatMult. */ 439 if (zz == yy) { 440 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 441 * with alpha and beta both set to 1.0. */ 442 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 443 } else { 444 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 445 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 446 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 447 for (i=0; i<m; i++) { 448 z[i] += y[i]; 449 } 450 } 451 452 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 454 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 455 if (stat != SPARSE_STATUS_SUCCESS) { 456 PetscFunctionReturn(PETSC_ERR_LIB); 457 } 458 PetscFunctionReturn(0); 459 } 460 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 461 462 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 const PetscScalar *x; 466 PetscScalar *y,*z; 467 const MatScalar *aa; 468 PetscErrorCode ierr; 469 PetscInt m=A->rmap->n; 470 PetscInt n=A->cmap->n; 471 const PetscInt *aj,*ai; 472 PetscInt i; 473 474 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 475 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 476 PetscScalar alpha = 1.0; 477 PetscScalar beta; 478 char matdescra[6]; 479 480 PetscFunctionBegin; 481 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 482 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 483 484 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 485 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 486 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 487 aa = a->a; /* Nonzero elements stored row-by-row. */ 488 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 489 490 /* Call MKL sparse BLAS routine to do the MatMult. */ 491 if (zz == yy) { 492 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 493 beta = 1.0; 494 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 495 } else { 496 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 497 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 498 beta = 0.0; 499 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 500 for (i=0; i<n; i++) { 501 z[i] += y[i]; 502 } 503 } 504 505 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 512 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 513 { 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 515 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 516 const PetscScalar *x; 517 PetscScalar *y,*z; 518 PetscErrorCode ierr; 519 PetscInt n=A->cmap->n; 520 PetscInt i; 521 522 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 523 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 524 525 PetscFunctionBegin; 526 527 /* If there are no nonzero entries, this is a no-op: return immediately. */ 528 if(!a->nz) PetscFunctionReturn(0); 529 530 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 531 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 532 533 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 534 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 535 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 536 if (!aijmkl->sparse_optimized) { 537 MatSeqAIJMKL_create_mkl_handle(A); 538 } 539 540 /* Call MKL sparse BLAS routine to do the MatMult. */ 541 if (zz == yy) { 542 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 543 * with alpha and beta both set to 1.0. */ 544 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 545 } else { 546 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 547 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 548 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 549 for (i=0; i<n; i++) { 550 z[i] += y[i]; 551 } 552 } 553 554 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 555 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 557 if (stat != SPARSE_STATUS_SUCCESS) { 558 PetscFunctionReturn(PETSC_ERR_LIB); 559 } 560 PetscFunctionReturn(0); 561 } 562 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 563 564 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 565 { 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 570 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 575 { 576 PetscErrorCode ierr; 577 578 PetscFunctionBegin; 579 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 580 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 585 { 586 PetscErrorCode ierr; 587 588 PetscFunctionBegin; 589 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 590 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 595 { 596 PetscErrorCode ierr; 597 598 PetscFunctionBegin; 599 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 600 if (str == SAME_NONZERO_PATTERN) { 601 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 602 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 608 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 609 * routine, but can also be used to convert an assembled SeqAIJ matrix 610 * into a SeqAIJMKL one. */ 611 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 612 { 613 PetscErrorCode ierr; 614 Mat B = *newmat; 615 Mat_SeqAIJMKL *aijmkl; 616 PetscBool set; 617 PetscBool sametype; 618 619 PetscFunctionBegin; 620 if (reuse == MAT_INITIAL_MATRIX) { 621 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 622 } 623 624 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 625 if (sametype) PetscFunctionReturn(0); 626 627 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 628 B->spptr = (void*) aijmkl; 629 630 /* Set function pointers for methods that we inherit from AIJ but override. 631 * We also parse some command line options below, since those determine some of the methods we point to. */ 632 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 633 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 634 B->ops->destroy = MatDestroy_SeqAIJMKL; 635 636 aijmkl->sparse_optimized = PETSC_FALSE; 637 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 638 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 639 #elif 640 aijmkl->no_SpMV2 = PETSC_TRUE; 641 #endif 642 643 /* Parse command line options. */ 644 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 645 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 646 ierr = PetscOptionsEnd();CHKERRQ(ierr); 647 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 648 if(!aijmkl->no_SpMV2) { 649 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 650 aijmkl->no_SpMV2 = PETSC_TRUE; 651 } 652 #endif 653 654 if(!aijmkl->no_SpMV2) { 655 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 656 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 657 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 658 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 659 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 660 #endif 661 } else { 662 B->ops->mult = MatMult_SeqAIJMKL; 663 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 664 B->ops->multadd = MatMultAdd_SeqAIJMKL; 665 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 666 } 667 668 B->ops->scale = MatScale_SeqAIJMKL; 669 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 670 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 671 B->ops->axpy = MatAXPY_SeqAIJMKL; 672 673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 678 679 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 680 *newmat = B; 681 PetscFunctionReturn(0); 682 } 683 684 /*@C 685 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 686 This type inherits from AIJ and is largely identical, but uses sparse BLAS 687 routines from Intel MKL whenever possible. 688 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 689 operations are currently supported. 690 If the installed version of MKL supports the "SpMV2" sparse 691 inspector-executor routines, then those are used by default. 692 693 Collective on MPI_Comm 694 695 Input Parameters: 696 + comm - MPI communicator, set to PETSC_COMM_SELF 697 . m - number of rows 698 . n - number of columns 699 . nz - number of nonzeros per row (same for all rows) 700 - nnz - array containing the number of nonzeros in the various rows 701 (possibly different for each row) or NULL 702 703 Output Parameter: 704 . A - the matrix 705 706 Options Database Keys: 707 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 708 709 Notes: 710 If nnz is given then nz is ignored 711 712 Level: intermediate 713 714 .keywords: matrix, MKL, sparse, parallel 715 716 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 717 @*/ 718 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 719 { 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatCreate(comm,A);CHKERRQ(ierr); 724 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 725 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 726 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 731 { 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 736 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739