1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->scale = MatScale_SeqAIJ; 51 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 52 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 53 B->ops->axpy = MatAXPY_SeqAIJ; 54 55 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 64 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 65 66 if (aijmkl->sparse_optimized) { 67 sparse_status_t stat; 68 stat = mkl_sparse_destroy(aijmkl->csrA); 69 if (stat != SPARSE_STATUS_SUCCESS) { 70 PetscFunctionReturn(PETSC_ERR_LIB); 71 } 72 } 73 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 74 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 75 76 /* Change the type of B to MATSEQAIJ. */ 77 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 78 79 *newmat = B; 80 PetscFunctionReturn(0); 81 } 82 83 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 84 { 85 PetscErrorCode ierr; 86 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 87 88 PetscFunctionBegin; 89 90 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 91 * spptr pointer. */ 92 if (aijmkl) { 93 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 94 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 95 if (aijmkl->sparse_optimized) { 96 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 97 stat = mkl_sparse_destroy(aijmkl->csrA); 98 if (stat != SPARSE_STATUS_SUCCESS) { 99 PetscFunctionReturn(PETSC_ERR_LIB); 100 } 101 } 102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 103 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 104 } 105 106 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 107 * to destroy everything that remains. */ 108 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 109 /* Note that I don't call MatSetType(). I believe this is because that 110 * is only to be called when *building* a matrix. I could be wrong, but 111 * that is how things work for the SuperLU matrix class. */ 112 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 117 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 118 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 119 * handle, creates a new one, and then calls mkl_sparse_optimize(). 120 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 121 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 122 * an unoptimized matrix handle here. */ 123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 124 { 125 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 126 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 127 * does nothing. We make it callable anyway in this case because it cuts 128 * down on littering the code with #ifdefs. */ 129 PetscFunctionReturn(0); 130 #else 131 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 132 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 133 PetscInt m,n; 134 MatScalar *aa; 135 PetscInt *aj,*ai; 136 sparse_status_t stat; 137 138 PetscFunctionBegin; 139 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 140 141 if (aijmkl->sparse_optimized) { 142 /* Matrix has been previously assembled and optimized. Must destroy old 143 * matrix handle before running the optimization step again. */ 144 stat = mkl_sparse_destroy(aijmkl->csrA); 145 if (stat != SPARSE_STATUS_SUCCESS) { 146 PetscFunctionReturn(PETSC_ERR_LIB); 147 } 148 } 149 aijmkl->sparse_optimized = PETSC_FALSE; 150 151 /* Now perform the SpMV2 setup and matrix optimization. */ 152 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 155 m = A->rmap->n; 156 n = A->cmap->n; 157 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158 aa = a->a; /* Nonzero elements stored row-by-row. */ 159 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 160 if ((a->nz!=0) & !(A->structure_only)) { 161 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 162 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 163 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 165 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 166 stat = mkl_sparse_optimize(aijmkl->csrA); 167 if (stat != SPARSE_STATUS_SUCCESS) { 168 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 169 PetscFunctionReturn(PETSC_ERR_LIB); 170 } 171 aijmkl->sparse_optimized = PETSC_TRUE; 172 } 173 174 PetscFunctionReturn(0); 175 #endif 176 } 177 178 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 179 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 180 * matrix handle. */ 181 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 182 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,Mat *mat) 183 { 184 PetscErrorCode ierr; 185 sparse_status_t stat; 186 sparse_index_base_t indexing; 187 PetscInt nrows, ncols; 188 PetscInt *aj,*ai; 189 MatScalar *aa; 190 Mat A; 191 Mat_SeqAIJMKL *aijmkl; 192 193 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,NULL,&aj,&aa); 194 if (stat != SPARSE_STATUS_SUCCESS) { 195 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 196 PetscFunctionReturn(PETSC_ERR_LIB); 197 } 198 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 199 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 200 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 201 202 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 203 * Now turn it into a MATSEQAIJMKL. */ 204 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 205 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 206 aijmkl->csrA = csrA; 207 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 208 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 209 * and just need to be able to run the MKL optimization step. */ 210 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 211 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 212 stat = mkl_sparse_optimize(aijmkl->csrA); 213 if (stat != SPARSE_STATUS_SUCCESS) { 214 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 215 PetscFunctionReturn(PETSC_ERR_LIB); 216 } 217 aijmkl->sparse_optimized = PETSC_TRUE; 218 219 *mat = A; 220 PetscFunctionReturn(0); 221 } 222 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 223 224 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 225 { 226 PetscErrorCode ierr; 227 Mat_SeqAIJMKL *aijmkl; 228 Mat_SeqAIJMKL *aijmkl_dest; 229 230 PetscFunctionBegin; 231 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 232 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 233 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 234 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 235 aijmkl_dest->sparse_optimized = PETSC_FALSE; 236 if (aijmkl->eager_inspection) { 237 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 243 { 244 PetscErrorCode ierr; 245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 246 Mat_SeqAIJMKL *aijmkl; 247 248 PetscFunctionBegin; 249 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 250 251 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 252 * extra information and some different methods, call the AssemblyEnd 253 * routine for a MATSEQAIJ. 254 * I'm not sure if this is the best way to do this, but it avoids 255 * a lot of code duplication. */ 256 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 257 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 258 259 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 260 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 261 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 262 if (aijmkl->eager_inspection) { 263 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 264 } 265 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 272 const PetscScalar *x; 273 PetscScalar *y; 274 const MatScalar *aa; 275 PetscErrorCode ierr; 276 PetscInt m=A->rmap->n; 277 PetscInt n=A->cmap->n; 278 PetscScalar alpha = 1.0; 279 PetscScalar beta = 0.0; 280 const PetscInt *aj,*ai; 281 char matdescra[6]; 282 283 284 /* Variables not in MatMult_SeqAIJ. */ 285 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 286 287 PetscFunctionBegin; 288 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 289 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 290 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 291 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 292 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 293 aa = a->a; /* Nonzero elements stored row-by-row. */ 294 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 295 296 /* Call MKL sparse BLAS routine to do the MatMult. */ 297 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 298 299 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 300 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 301 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 306 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 307 { 308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 309 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 310 const PetscScalar *x; 311 PetscScalar *y; 312 PetscErrorCode ierr; 313 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 314 315 PetscFunctionBegin; 316 317 /* If there are no nonzero entries, zero yy and return immediately. */ 318 if(!a->nz) { 319 PetscInt i; 320 PetscInt m=A->rmap->n; 321 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 322 for (i=0; i<m; i++) { 323 y[i] = 0.0; 324 } 325 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 330 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 331 332 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 333 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 334 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 335 if (!aijmkl->sparse_optimized) { 336 MatSeqAIJMKL_create_mkl_handle(A); 337 } 338 339 /* Call MKL SpMV2 executor routine to do the MatMult. */ 340 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 341 342 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 343 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 344 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 345 if (stat != SPARSE_STATUS_SUCCESS) { 346 PetscFunctionReturn(PETSC_ERR_LIB); 347 } 348 PetscFunctionReturn(0); 349 } 350 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 351 352 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 353 { 354 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 355 const PetscScalar *x; 356 PetscScalar *y; 357 const MatScalar *aa; 358 PetscErrorCode ierr; 359 PetscInt m=A->rmap->n; 360 PetscInt n=A->cmap->n; 361 PetscScalar alpha = 1.0; 362 PetscScalar beta = 0.0; 363 const PetscInt *aj,*ai; 364 char matdescra[6]; 365 366 /* Variables not in MatMultTranspose_SeqAIJ. */ 367 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 368 369 PetscFunctionBegin; 370 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 371 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 372 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 373 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 374 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 375 aa = a->a; /* Nonzero elements stored row-by-row. */ 376 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 377 378 /* Call MKL sparse BLAS routine to do the MatMult. */ 379 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 380 381 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 382 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 388 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 389 { 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 391 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 392 const PetscScalar *x; 393 PetscScalar *y; 394 PetscErrorCode ierr; 395 sparse_status_t stat; 396 397 PetscFunctionBegin; 398 399 /* If there are no nonzero entries, zero yy and return immediately. */ 400 if(!a->nz) { 401 PetscInt i; 402 PetscInt n=A->cmap->n; 403 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 404 for (i=0; i<n; i++) { 405 y[i] = 0.0; 406 } 407 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 412 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 413 414 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 415 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 416 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 417 if (!aijmkl->sparse_optimized) { 418 MatSeqAIJMKL_create_mkl_handle(A); 419 } 420 421 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 422 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 423 424 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 426 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 427 if (stat != SPARSE_STATUS_SUCCESS) { 428 PetscFunctionReturn(PETSC_ERR_LIB); 429 } 430 PetscFunctionReturn(0); 431 } 432 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 433 434 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 435 { 436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 437 const PetscScalar *x; 438 PetscScalar *y,*z; 439 const MatScalar *aa; 440 PetscErrorCode ierr; 441 PetscInt m=A->rmap->n; 442 PetscInt n=A->cmap->n; 443 const PetscInt *aj,*ai; 444 PetscInt i; 445 446 /* Variables not in MatMultAdd_SeqAIJ. */ 447 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 448 PetscScalar alpha = 1.0; 449 PetscScalar beta; 450 char matdescra[6]; 451 452 PetscFunctionBegin; 453 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 454 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 455 456 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 457 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 458 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 459 aa = a->a; /* Nonzero elements stored row-by-row. */ 460 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 461 462 /* Call MKL sparse BLAS routine to do the MatMult. */ 463 if (zz == yy) { 464 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 465 beta = 1.0; 466 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 467 } else { 468 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 469 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 470 beta = 0.0; 471 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 472 for (i=0; i<m; i++) { 473 z[i] += y[i]; 474 } 475 } 476 477 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 479 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 484 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 487 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 488 const PetscScalar *x; 489 PetscScalar *y,*z; 490 PetscErrorCode ierr; 491 PetscInt m=A->rmap->n; 492 PetscInt i; 493 494 /* Variables not in MatMultAdd_SeqAIJ. */ 495 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 496 497 PetscFunctionBegin; 498 499 /* If there are no nonzero entries, set zz = yy and return immediately. */ 500 if(!a->nz) { 501 PetscInt i; 502 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 503 for (i=0; i<m; i++) { 504 z[i] = y[i]; 505 } 506 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 511 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 512 513 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 514 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 515 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 516 if (!aijmkl->sparse_optimized) { 517 MatSeqAIJMKL_create_mkl_handle(A); 518 } 519 520 /* Call MKL sparse BLAS routine to do the MatMult. */ 521 if (zz == yy) { 522 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 523 * with alpha and beta both set to 1.0. */ 524 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 525 } else { 526 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 527 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 528 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 529 for (i=0; i<m; i++) { 530 z[i] += y[i]; 531 } 532 } 533 534 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 536 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 537 if (stat != SPARSE_STATUS_SUCCESS) { 538 PetscFunctionReturn(PETSC_ERR_LIB); 539 } 540 PetscFunctionReturn(0); 541 } 542 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 543 544 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 545 { 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 const PetscScalar *x; 548 PetscScalar *y,*z; 549 const MatScalar *aa; 550 PetscErrorCode ierr; 551 PetscInt m=A->rmap->n; 552 PetscInt n=A->cmap->n; 553 const PetscInt *aj,*ai; 554 PetscInt i; 555 556 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 557 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 558 PetscScalar alpha = 1.0; 559 PetscScalar beta; 560 char matdescra[6]; 561 562 PetscFunctionBegin; 563 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 564 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 565 566 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 567 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 568 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 569 aa = a->a; /* Nonzero elements stored row-by-row. */ 570 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 571 572 /* Call MKL sparse BLAS routine to do the MatMult. */ 573 if (zz == yy) { 574 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 575 beta = 1.0; 576 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 577 } else { 578 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 579 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 580 beta = 0.0; 581 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 582 for (i=0; i<n; i++) { 583 z[i] += y[i]; 584 } 585 } 586 587 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 588 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 594 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 598 const PetscScalar *x; 599 PetscScalar *y,*z; 600 PetscErrorCode ierr; 601 PetscInt n=A->cmap->n; 602 PetscInt i; 603 604 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 605 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 606 607 PetscFunctionBegin; 608 609 /* If there are no nonzero entries, set zz = yy and return immediately. */ 610 if(!a->nz) { 611 PetscInt i; 612 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 613 for (i=0; i<n; i++) { 614 z[i] = y[i]; 615 } 616 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 621 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 622 623 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 624 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 625 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 626 if (!aijmkl->sparse_optimized) { 627 MatSeqAIJMKL_create_mkl_handle(A); 628 } 629 630 /* Call MKL sparse BLAS routine to do the MatMult. */ 631 if (zz == yy) { 632 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 633 * with alpha and beta both set to 1.0. */ 634 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 635 } else { 636 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 637 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 638 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 639 for (i=0; i<n; i++) { 640 z[i] += y[i]; 641 } 642 } 643 644 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 645 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 646 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 647 if (stat != SPARSE_STATUS_SUCCESS) { 648 PetscFunctionReturn(PETSC_ERR_LIB); 649 } 650 PetscFunctionReturn(0); 651 } 652 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 653 654 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 655 { 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 660 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 665 { 666 PetscErrorCode ierr; 667 668 PetscFunctionBegin; 669 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 670 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 675 { 676 PetscErrorCode ierr; 677 678 PetscFunctionBegin; 679 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 680 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 685 { 686 PetscErrorCode ierr; 687 688 PetscFunctionBegin; 689 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 690 if (str == SAME_NONZERO_PATTERN) { 691 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 692 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 693 } 694 PetscFunctionReturn(0); 695 } 696 697 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 698 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 699 * routine, but can also be used to convert an assembled SeqAIJ matrix 700 * into a SeqAIJMKL one. */ 701 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 702 { 703 PetscErrorCode ierr; 704 Mat B = *newmat; 705 Mat_SeqAIJMKL *aijmkl; 706 PetscBool set; 707 PetscBool sametype; 708 709 PetscFunctionBegin; 710 if (reuse == MAT_INITIAL_MATRIX) { 711 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 712 } 713 714 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 715 if (sametype) PetscFunctionReturn(0); 716 717 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 718 B->spptr = (void*) aijmkl; 719 720 /* Set function pointers for methods that we inherit from AIJ but override. 721 * We also parse some command line options below, since those determine some of the methods we point to. */ 722 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 723 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 724 B->ops->destroy = MatDestroy_SeqAIJMKL; 725 726 aijmkl->sparse_optimized = PETSC_FALSE; 727 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 728 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 729 #else 730 aijmkl->no_SpMV2 = PETSC_TRUE; 731 #endif 732 aijmkl->eager_inspection = PETSC_FALSE; 733 734 /* Parse command line options. */ 735 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 736 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 737 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 738 ierr = PetscOptionsEnd();CHKERRQ(ierr); 739 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 740 if(!aijmkl->no_SpMV2) { 741 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 742 aijmkl->no_SpMV2 = PETSC_TRUE; 743 } 744 #endif 745 746 if(!aijmkl->no_SpMV2) { 747 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 748 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 749 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 750 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 751 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 752 #endif 753 } else { 754 B->ops->mult = MatMult_SeqAIJMKL; 755 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 756 B->ops->multadd = MatMultAdd_SeqAIJMKL; 757 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 758 } 759 760 B->ops->scale = MatScale_SeqAIJMKL; 761 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 762 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 763 B->ops->axpy = MatAXPY_SeqAIJMKL; 764 765 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 770 771 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 772 *newmat = B; 773 PetscFunctionReturn(0); 774 } 775 776 /*@C 777 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 778 This type inherits from AIJ and is largely identical, but uses sparse BLAS 779 routines from Intel MKL whenever possible. 780 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 781 operations are currently supported. 782 If the installed version of MKL supports the "SpMV2" sparse 783 inspector-executor routines, then those are used by default. 784 785 Collective on MPI_Comm 786 787 Input Parameters: 788 + comm - MPI communicator, set to PETSC_COMM_SELF 789 . m - number of rows 790 . n - number of columns 791 . nz - number of nonzeros per row (same for all rows) 792 - nnz - array containing the number of nonzeros in the various rows 793 (possibly different for each row) or NULL 794 795 Output Parameter: 796 . A - the matrix 797 798 Options Database Keys: 799 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 800 801 Notes: 802 If nnz is given then nz is ignored 803 804 Level: intermediate 805 806 .keywords: matrix, MKL, sparse, parallel 807 808 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 809 @*/ 810 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 811 { 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 ierr = MatCreate(comm,A);CHKERRQ(ierr); 816 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 817 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 818 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 823 { 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 828 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 829 PetscFunctionReturn(0); 830 } 831