1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 B->ops->scale = MatScale_SeqAIJ; 49 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 50 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 51 B->ops->axpy = MatAXPY_SeqAIJ; 52 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 58 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 59 * simply involves destroying the MKL sparse matrix handle and then freeing 60 * the spptr pointer. */ 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if (aijmkl->sparse_optimized) { 63 sparse_status_t stat; 64 stat = mkl_sparse_destroy(aijmkl->csrA); 65 if (stat != SPARSE_STATUS_SUCCESS) { 66 PetscFunctionReturn(PETSC_ERR_LIB); 67 } 68 } 69 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 70 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 71 72 /* Change the type of B to MATSEQAIJ. */ 73 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 74 75 *newmat = B; 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 113 { 114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115 Mat_SeqAIJMKL *aijmkl; 116 PetscInt m,n; 117 MatScalar *aa; 118 PetscInt *aj,*ai; 119 120 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 121 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 122 * does nothing. We make it callable anyway in this case because it cuts 123 * down on littering the code with #ifdefs. */ 124 PetscFunctionReturn(0); 125 #else 126 127 sparse_status_t stat; 128 129 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 130 131 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 132 133 if (aijmkl->sparse_optimized) { 134 /* Matrix has been previously assembled and optimized. Must destroy old 135 * matrix handle before running the optimization step again. */ 136 stat = mkl_sparse_destroy(aijmkl->csrA); 137 if (stat != SPARSE_STATUS_SUCCESS) { 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 } 141 aijmkl->sparse_optimized = PETSC_FALSE; 142 143 /* Now perform the SpMV2 setup and matrix optimization. */ 144 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 145 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 146 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 147 m = A->rmap->n; 148 n = A->cmap->n; 149 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 150 aa = a->a; /* Nonzero elements stored row-by-row. */ 151 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 152 if (a->nz) { 153 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 154 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 155 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 156 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158 stat = mkl_sparse_optimize(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) { 160 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 161 PetscFunctionReturn(PETSC_ERR_LIB); 162 } 163 aijmkl->sparse_optimized = PETSC_TRUE; 164 } 165 166 PetscFunctionReturn(0); 167 #endif 168 } 169 170 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqAIJMKL *aijmkl; 174 Mat_SeqAIJMKL *aijmkl_dest; 175 176 PetscFunctionBegin; 177 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 178 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 179 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 180 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 181 aijmkl_dest->sparse_optimized = PETSC_FALSE; 182 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 187 { 188 PetscErrorCode ierr; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 191 PetscFunctionBegin; 192 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 193 194 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 195 * extra information and some different methods, call the AssemblyEnd 196 * routine for a MATSEQAIJ. 197 * I'm not sure if this is the best way to do this, but it avoids 198 * a lot of code duplication. 199 * I also note that currently MATSEQAIJMKL doesn't know anything about 200 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 201 * are many zero rows. If the SeqAIJ assembly end routine decides to use 202 * this, this may break things. (Don't know... haven't looked at it. 203 * Do I need to disable this somehow?) */ 204 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 205 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 206 207 /* Now create the MKL sparse handle (if needed; the function checks). */ 208 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 209 210 PetscFunctionReturn(0); 211 } 212 213 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 214 { 215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 216 const PetscScalar *x; 217 PetscScalar *y; 218 const MatScalar *aa; 219 PetscErrorCode ierr; 220 PetscInt m=A->rmap->n; 221 PetscInt n=A->cmap->n; 222 PetscScalar alpha = 1.0; 223 PetscScalar beta = 0.0; 224 const PetscInt *aj,*ai; 225 char matdescra[6]; 226 227 228 /* Variables not in MatMult_SeqAIJ. */ 229 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 230 231 PetscFunctionBegin; 232 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 233 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 236 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 237 aa = a->a; /* Nonzero elements stored row-by-row. */ 238 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 239 240 /* Call MKL sparse BLAS routine to do the MatMult. */ 241 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 242 243 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 250 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 251 { 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 253 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 254 const PetscScalar *x; 255 PetscScalar *y; 256 PetscErrorCode ierr; 257 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 258 259 PetscFunctionBegin; 260 261 /* If there are no nonzero entries, this is a no-op: return immediately. */ 262 if(!a->nz) PetscFunctionReturn(0); 263 264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 266 267 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 268 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 269 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 270 if (!aijmkl->sparse_optimized) { 271 MatSeqAIJMKL_create_mkl_handle(A); 272 } 273 274 /* Call MKL SpMV2 executor routine to do the MatMult. */ 275 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 276 277 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 278 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 if (stat != SPARSE_STATUS_SUCCESS) { 281 PetscFunctionReturn(PETSC_ERR_LIB); 282 } 283 PetscFunctionReturn(0); 284 } 285 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 286 287 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 288 { 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 290 const PetscScalar *x; 291 PetscScalar *y; 292 const MatScalar *aa; 293 PetscErrorCode ierr; 294 PetscInt m=A->rmap->n; 295 PetscInt n=A->cmap->n; 296 PetscScalar alpha = 1.0; 297 PetscScalar beta = 0.0; 298 const PetscInt *aj,*ai; 299 char matdescra[6]; 300 301 /* Variables not in MatMultTranspose_SeqAIJ. */ 302 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 303 304 PetscFunctionBegin; 305 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 306 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 307 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 308 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 309 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 310 aa = a->a; /* Nonzero elements stored row-by-row. */ 311 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 312 313 /* Call MKL sparse BLAS routine to do the MatMult. */ 314 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 315 316 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 323 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 324 { 325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 326 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 327 const PetscScalar *x; 328 PetscScalar *y; 329 PetscErrorCode ierr; 330 sparse_status_t stat; 331 332 PetscFunctionBegin; 333 334 /* If there are no nonzero entries, this is a no-op: return immediately. */ 335 if(!a->nz) PetscFunctionReturn(0); 336 337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 339 340 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 341 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 342 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 343 if (!aijmkl->sparse_optimized) { 344 MatSeqAIJMKL_create_mkl_handle(A); 345 } 346 347 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 348 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 349 350 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353 if (stat != SPARSE_STATUS_SUCCESS) { 354 PetscFunctionReturn(PETSC_ERR_LIB); 355 } 356 PetscFunctionReturn(0); 357 } 358 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 359 360 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 361 { 362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 363 const PetscScalar *x; 364 PetscScalar *y,*z; 365 const MatScalar *aa; 366 PetscErrorCode ierr; 367 PetscInt m=A->rmap->n; 368 PetscInt n=A->cmap->n; 369 const PetscInt *aj,*ai; 370 PetscInt i; 371 372 /* Variables not in MatMultAdd_SeqAIJ. */ 373 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 374 PetscScalar alpha = 1.0; 375 PetscScalar beta; 376 char matdescra[6]; 377 378 PetscFunctionBegin; 379 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 380 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 381 382 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 384 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 385 aa = a->a; /* Nonzero elements stored row-by-row. */ 386 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 387 388 /* Call MKL sparse BLAS routine to do the MatMult. */ 389 if (zz == yy) { 390 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 391 beta = 1.0; 392 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 393 } else { 394 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 395 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 396 beta = 0.0; 397 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 398 for (i=0; i<m; i++) { 399 z[i] += y[i]; 400 } 401 } 402 403 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 410 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 413 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 414 const PetscScalar *x; 415 PetscScalar *y,*z; 416 PetscErrorCode ierr; 417 PetscInt m=A->rmap->n; 418 PetscInt i; 419 420 /* Variables not in MatMultAdd_SeqAIJ. */ 421 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 422 423 PetscFunctionBegin; 424 425 /* If there are no nonzero entries, this is a no-op: return immediately. */ 426 if(!a->nz) PetscFunctionReturn(0); 427 428 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 429 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 430 431 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 432 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 433 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 434 if (!aijmkl->sparse_optimized) { 435 MatSeqAIJMKL_create_mkl_handle(A); 436 } 437 438 /* Call MKL sparse BLAS routine to do the MatMult. */ 439 if (zz == yy) { 440 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 441 * with alpha and beta both set to 1.0. */ 442 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 443 } else { 444 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 445 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 446 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 447 for (i=0; i<m; i++) { 448 z[i] += y[i]; 449 } 450 } 451 452 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 454 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 455 if (stat != SPARSE_STATUS_SUCCESS) { 456 PetscFunctionReturn(PETSC_ERR_LIB); 457 } 458 PetscFunctionReturn(0); 459 } 460 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 461 462 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 const PetscScalar *x; 466 PetscScalar *y,*z; 467 const MatScalar *aa; 468 PetscErrorCode ierr; 469 PetscInt m=A->rmap->n; 470 PetscInt n=A->cmap->n; 471 const PetscInt *aj,*ai; 472 PetscInt i; 473 474 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 475 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 476 PetscScalar alpha = 1.0; 477 PetscScalar beta; 478 char matdescra[6]; 479 480 PetscFunctionBegin; 481 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 482 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 483 484 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 485 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 486 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 487 aa = a->a; /* Nonzero elements stored row-by-row. */ 488 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 489 490 /* Call MKL sparse BLAS routine to do the MatMult. */ 491 if (zz == yy) { 492 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 493 beta = 1.0; 494 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 495 } else { 496 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 497 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 498 beta = 0.0; 499 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 500 for (i=0; i<n; i++) { 501 z[i] += y[i]; 502 } 503 } 504 505 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 512 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 513 { 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 515 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 516 const PetscScalar *x; 517 PetscScalar *y,*z; 518 PetscErrorCode ierr; 519 PetscInt n=A->cmap->n; 520 PetscInt i; 521 522 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 523 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 524 525 PetscFunctionBegin; 526 527 /* If there are no nonzero entries, this is a no-op: return immediately. */ 528 if(!a->nz) PetscFunctionReturn(0); 529 530 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 531 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 532 533 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 534 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 535 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 536 if (!aijmkl->sparse_optimized) { 537 MatSeqAIJMKL_create_mkl_handle(A); 538 } 539 540 /* Call MKL sparse BLAS routine to do the MatMult. */ 541 if (zz == yy) { 542 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 543 * with alpha and beta both set to 1.0. */ 544 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 545 } else { 546 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 547 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 548 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 549 for (i=0; i<n; i++) { 550 z[i] += y[i]; 551 } 552 } 553 554 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 555 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 557 if (stat != SPARSE_STATUS_SUCCESS) { 558 PetscFunctionReturn(PETSC_ERR_LIB); 559 } 560 PetscFunctionReturn(0); 561 } 562 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 563 564 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 565 { 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 570 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 575 { 576 PetscErrorCode ierr; 577 578 PetscFunctionBegin; 579 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 580 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 585 { 586 PetscErrorCode ierr; 587 588 PetscFunctionBegin; 589 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 590 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 595 { 596 PetscErrorCode ierr; 597 598 PetscFunctionBegin; 599 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 600 if (str == SAME_NONZERO_PATTERN) { 601 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 602 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 608 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 609 * routine, but can also be used to convert an assembled SeqAIJ matrix 610 * into a SeqAIJMKL one. */ 611 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 612 { 613 PetscErrorCode ierr; 614 Mat B = *newmat; 615 Mat_SeqAIJMKL *aijmkl; 616 PetscBool set; 617 PetscBool sametype; 618 619 PetscFunctionBegin; 620 if (reuse == MAT_INITIAL_MATRIX) { 621 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 622 } 623 624 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 625 if (sametype) PetscFunctionReturn(0); 626 627 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 628 B->spptr = (void*) aijmkl; 629 630 /* Set function pointers for methods that we inherit from AIJ but override. 631 * We also parse some command line options below, since those determine some of the methods we point to. */ 632 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 633 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 634 B->ops->destroy = MatDestroy_SeqAIJMKL; 635 636 aijmkl->sparse_optimized = PETSC_FALSE; 637 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 638 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 639 #elif 640 aijmkl->no_SpMV2 = PETSC_TRUE; 641 #endif 642 643 /* Parse command line options. */ 644 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 645 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 646 ierr = PetscOptionsEnd();CHKERRQ(ierr); 647 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 648 if(!aijmkl->no_SpMV2) { 649 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 650 aijmkl->no_SpMV2 = PETSC_TRUE; 651 } 652 #endif 653 654 if(!aijmkl->no_SpMV2) { 655 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 656 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 657 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 658 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 659 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 660 #endif 661 } else { 662 B->ops->mult = MatMult_SeqAIJMKL; 663 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 664 B->ops->multadd = MatMultAdd_SeqAIJMKL; 665 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 666 } 667 668 B->ops->scale = MatScale_SeqAIJMKL; 669 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 670 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 671 B->ops->axpy = MatAXPY_SeqAIJMKL; 672 673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 678 679 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 680 *newmat = B; 681 PetscFunctionReturn(0); 682 } 683 684 /*@C 685 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 686 This type inherits from AIJ and is largely identical, but uses sparse BLAS 687 routines from Intel MKL whenever possible. 688 Collective on MPI_Comm 689 690 Input Parameters: 691 + comm - MPI communicator, set to PETSC_COMM_SELF 692 . m - number of rows 693 . n - number of columns 694 . nz - number of nonzeros per row (same for all rows) 695 - nnz - array containing the number of nonzeros in the various rows 696 (possibly different for each row) or NULL 697 698 Output Parameter: 699 . A - the matrix 700 701 Notes: 702 If nnz is given then nz is ignored 703 704 Level: intermediate 705 706 .keywords: matrix, cray, sparse, parallel 707 708 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 709 @*/ 710 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 711 { 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 ierr = MatCreate(comm,A);CHKERRQ(ierr); 716 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 717 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 718 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 723 { 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 728 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731