1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 53 B->ops->scale = MatScale_SeqAIJ; 54 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 55 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 56 B->ops->axpy = MatAXPY_SeqAIJ; 57 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 if(!aijmkl->no_SpMV2) { 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 } 67 68 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69 * simply involves destroying the MKL sparse matrix handle and then freeing 70 * the spptr pointer. */ 71 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 72 73 if (aijmkl->sparse_optimized) { 74 sparse_status_t stat; 75 stat = mkl_sparse_destroy(aijmkl->csrA); 76 if (stat != SPARSE_STATUS_SUCCESS) { 77 PetscFunctionReturn(PETSC_ERR_LIB); 78 } 79 } 80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 82 83 /* Change the type of B to MATSEQAIJ. */ 84 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 85 86 *newmat = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 95 PetscFunctionBegin; 96 97 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98 * spptr pointer. */ 99 if (aijmkl) { 100 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 101 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 102 if (aijmkl->sparse_optimized) { 103 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 104 stat = mkl_sparse_destroy(aijmkl->csrA); 105 if (stat != SPARSE_STATUS_SUCCESS) { 106 PetscFunctionReturn(PETSC_ERR_LIB); 107 } 108 } 109 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 110 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 111 } 112 113 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 114 * to destroy everything that remains. */ 115 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 116 /* Note that I don't call MatSetType(). I believe this is because that 117 * is only to be called when *building* a matrix. I could be wrong, but 118 * that is how things work for the SuperLU matrix class. */ 119 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 124 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 125 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 126 * handle, creates a new one, and then calls mkl_sparse_optimize(). 127 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 128 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 129 * an unoptimized matrix handle here. */ 130 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 131 { 132 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 133 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 134 * does nothing. We make it callable anyway in this case because it cuts 135 * down on littering the code with #ifdefs. */ 136 PetscFunctionBegin; 137 PetscFunctionReturn(0); 138 #else 139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 140 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 141 PetscInt m,n; 142 MatScalar *aa; 143 PetscInt *aj,*ai; 144 sparse_status_t stat; 145 PetscErrorCode ierr; 146 PetscObjectState state; 147 148 PetscFunctionBegin; 149 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 150 151 if (aijmkl->sparse_optimized) { 152 /* Matrix has been previously assembled and optimized. Must destroy old 153 * matrix handle before running the optimization step again. */ 154 stat = mkl_sparse_destroy(aijmkl->csrA); 155 if (stat != SPARSE_STATUS_SUCCESS) { 156 PetscFunctionReturn(PETSC_ERR_LIB); 157 } 158 } 159 aijmkl->sparse_optimized = PETSC_FALSE; 160 161 /* Now perform the SpMV2 setup and matrix optimization. */ 162 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 163 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 164 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 165 m = A->rmap->n; 166 n = A->cmap->n; 167 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 168 aa = a->a; /* Nonzero elements stored row-by-row. */ 169 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 170 if ((a->nz!=0) & !(A->structure_only)) { 171 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 172 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 173 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 174 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 175 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 176 stat = mkl_sparse_optimize(aijmkl->csrA); 177 if (stat != SPARSE_STATUS_SUCCESS) { 178 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 179 PetscFunctionReturn(PETSC_ERR_LIB); 180 } 181 aijmkl->sparse_optimized = PETSC_TRUE; 182 } 183 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 184 185 PetscFunctionReturn(0); 186 #endif 187 } 188 189 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 190 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 191 * matrix handle. 192 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 193 * there is no good alternative. */ 194 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 195 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 196 { 197 PetscErrorCode ierr; 198 sparse_status_t stat; 199 sparse_index_base_t indexing; 200 PetscInt nrows, ncols; 201 PetscInt *aj,*ai,*dummy; 202 MatScalar *aa; 203 Mat A; 204 Mat_SeqAIJ *a; 205 Mat_SeqAIJMKL *aijmkl; 206 207 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 208 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 209 if (stat != SPARSE_STATUS_SUCCESS) { 210 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 211 PetscFunctionReturn(PETSC_ERR_LIB); 212 } 213 214 if (reuse == MAT_REUSE_MATRIX) { 215 ierr = MatDestroy(mat);CHKERRQ(ierr); 216 } 217 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 218 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 219 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 220 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 221 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 222 * they will be destroyed when the MKL handle is destroyed. 223 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 224 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 225 226 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 227 * Now turn it into a MATSEQAIJMKL. */ 228 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 229 230 a = (Mat_SeqAIJ*)A->data; 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 aijmkl->csrA = csrA; 233 234 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 235 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 236 * and just need to be able to run the MKL optimization step. */ 237 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 240 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 241 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 242 stat = mkl_sparse_optimize(aijmkl->csrA); 243 if (stat != SPARSE_STATUS_SUCCESS) { 244 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 245 PetscFunctionReturn(PETSC_ERR_LIB); 246 } 247 aijmkl->sparse_optimized = PETSC_TRUE; 248 249 *mat = A; 250 PetscFunctionReturn(0); 251 } 252 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 253 254 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 255 { 256 PetscErrorCode ierr; 257 Mat_SeqAIJMKL *aijmkl; 258 Mat_SeqAIJMKL *aijmkl_dest; 259 260 PetscFunctionBegin; 261 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 262 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 263 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 264 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 265 aijmkl_dest->sparse_optimized = PETSC_FALSE; 266 if (aijmkl->eager_inspection) { 267 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 268 } 269 PetscFunctionReturn(0); 270 } 271 272 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 273 { 274 PetscErrorCode ierr; 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 276 Mat_SeqAIJMKL *aijmkl; 277 278 PetscFunctionBegin; 279 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 280 281 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 282 * extra information and some different methods, call the AssemblyEnd 283 * routine for a MATSEQAIJ. 284 * I'm not sure if this is the best way to do this, but it avoids 285 * a lot of code duplication. */ 286 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 287 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 288 289 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 290 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 291 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 292 if (aijmkl->eager_inspection) { 293 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 294 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 295 } else if (aijmkl->sparse_optimized) { 296 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 297 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 298 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 299 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 300 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 301 if (stat != SPARSE_STATUS_SUCCESS) { 302 PetscFunctionReturn(PETSC_ERR_LIB); 303 } 304 aijmkl->sparse_optimized = PETSC_FALSE; 305 #endif 306 } 307 308 PetscFunctionReturn(0); 309 } 310 311 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 312 { 313 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 314 const PetscScalar *x; 315 PetscScalar *y; 316 const MatScalar *aa; 317 PetscErrorCode ierr; 318 PetscInt m=A->rmap->n; 319 PetscInt n=A->cmap->n; 320 PetscScalar alpha = 1.0; 321 PetscScalar beta = 0.0; 322 const PetscInt *aj,*ai; 323 char matdescra[6]; 324 325 326 /* Variables not in MatMult_SeqAIJ. */ 327 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 328 329 PetscFunctionBegin; 330 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 331 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 332 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 333 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 334 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 335 aa = a->a; /* Nonzero elements stored row-by-row. */ 336 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 337 338 /* Call MKL sparse BLAS routine to do the MatMult. */ 339 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 340 341 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 342 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 348 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 349 { 350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 351 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 352 const PetscScalar *x; 353 PetscScalar *y; 354 PetscErrorCode ierr; 355 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 356 PetscObjectState state; 357 358 PetscFunctionBegin; 359 360 /* If there are no nonzero entries, zero yy and return immediately. */ 361 if(!a->nz) { 362 PetscInt i; 363 PetscInt m=A->rmap->n; 364 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 365 for (i=0; i<m; i++) { 366 y[i] = 0.0; 367 } 368 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 373 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 374 375 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 376 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 377 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 378 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 379 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 380 MatSeqAIJMKL_create_mkl_handle(A); 381 } 382 383 /* Call MKL SpMV2 executor routine to do the MatMult. */ 384 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 385 386 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 387 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 388 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 389 if (stat != SPARSE_STATUS_SUCCESS) { 390 PetscFunctionReturn(PETSC_ERR_LIB); 391 } 392 PetscFunctionReturn(0); 393 } 394 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 395 396 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 397 { 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 399 const PetscScalar *x; 400 PetscScalar *y; 401 const MatScalar *aa; 402 PetscErrorCode ierr; 403 PetscInt m=A->rmap->n; 404 PetscInt n=A->cmap->n; 405 PetscScalar alpha = 1.0; 406 PetscScalar beta = 0.0; 407 const PetscInt *aj,*ai; 408 char matdescra[6]; 409 410 /* Variables not in MatMultTranspose_SeqAIJ. */ 411 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 412 413 PetscFunctionBegin; 414 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 415 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 416 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 417 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 418 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 419 aa = a->a; /* Nonzero elements stored row-by-row. */ 420 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 421 422 /* Call MKL sparse BLAS routine to do the MatMult. */ 423 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 424 425 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 427 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 432 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 433 { 434 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 435 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 436 const PetscScalar *x; 437 PetscScalar *y; 438 PetscErrorCode ierr; 439 sparse_status_t stat; 440 PetscObjectState state; 441 442 PetscFunctionBegin; 443 444 /* If there are no nonzero entries, zero yy and return immediately. */ 445 if(!a->nz) { 446 PetscInt i; 447 PetscInt n=A->cmap->n; 448 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 449 for (i=0; i<n; i++) { 450 y[i] = 0.0; 451 } 452 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 457 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 458 459 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 460 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 461 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 462 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 463 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 464 MatSeqAIJMKL_create_mkl_handle(A); 465 } 466 467 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 468 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 469 470 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 471 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 473 if (stat != SPARSE_STATUS_SUCCESS) { 474 PetscFunctionReturn(PETSC_ERR_LIB); 475 } 476 PetscFunctionReturn(0); 477 } 478 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 479 480 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 483 const PetscScalar *x; 484 PetscScalar *y,*z; 485 const MatScalar *aa; 486 PetscErrorCode ierr; 487 PetscInt m=A->rmap->n; 488 PetscInt n=A->cmap->n; 489 const PetscInt *aj,*ai; 490 PetscInt i; 491 492 /* Variables not in MatMultAdd_SeqAIJ. */ 493 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 494 PetscScalar alpha = 1.0; 495 PetscScalar beta; 496 char matdescra[6]; 497 498 PetscFunctionBegin; 499 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 500 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 501 502 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 503 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 504 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 505 aa = a->a; /* Nonzero elements stored row-by-row. */ 506 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 507 508 /* Call MKL sparse BLAS routine to do the MatMult. */ 509 if (zz == yy) { 510 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 511 beta = 1.0; 512 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 513 } else { 514 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 515 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 516 beta = 0.0; 517 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 518 for (i=0; i<m; i++) { 519 z[i] += y[i]; 520 } 521 } 522 523 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 524 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 525 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 530 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 533 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 534 const PetscScalar *x; 535 PetscScalar *y,*z; 536 PetscErrorCode ierr; 537 PetscInt m=A->rmap->n; 538 PetscInt i; 539 540 /* Variables not in MatMultAdd_SeqAIJ. */ 541 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 542 PetscObjectState state; 543 544 PetscFunctionBegin; 545 546 /* If there are no nonzero entries, set zz = yy and return immediately. */ 547 if(!a->nz) { 548 PetscInt i; 549 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 550 for (i=0; i<m; i++) { 551 z[i] = y[i]; 552 } 553 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 559 560 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 561 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 562 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 563 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 564 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 565 MatSeqAIJMKL_create_mkl_handle(A); 566 } 567 568 /* Call MKL sparse BLAS routine to do the MatMult. */ 569 if (zz == yy) { 570 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 571 * with alpha and beta both set to 1.0. */ 572 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 573 } else { 574 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 575 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 576 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 577 for (i=0; i<m; i++) { 578 z[i] += y[i]; 579 } 580 } 581 582 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 584 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 585 if (stat != SPARSE_STATUS_SUCCESS) { 586 PetscFunctionReturn(PETSC_ERR_LIB); 587 } 588 PetscFunctionReturn(0); 589 } 590 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 591 592 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 const PetscScalar *x; 596 PetscScalar *y,*z; 597 const MatScalar *aa; 598 PetscErrorCode ierr; 599 PetscInt m=A->rmap->n; 600 PetscInt n=A->cmap->n; 601 const PetscInt *aj,*ai; 602 PetscInt i; 603 604 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 605 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 606 PetscScalar alpha = 1.0; 607 PetscScalar beta; 608 char matdescra[6]; 609 610 PetscFunctionBegin; 611 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 612 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 613 614 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 615 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 616 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 617 aa = a->a; /* Nonzero elements stored row-by-row. */ 618 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 619 620 /* Call MKL sparse BLAS routine to do the MatMult. */ 621 if (zz == yy) { 622 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 623 beta = 1.0; 624 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 625 } else { 626 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 627 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 628 beta = 0.0; 629 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 630 for (i=0; i<n; i++) { 631 z[i] += y[i]; 632 } 633 } 634 635 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 636 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 637 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 642 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 645 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 646 const PetscScalar *x; 647 PetscScalar *y,*z; 648 PetscErrorCode ierr; 649 PetscInt n=A->cmap->n; 650 PetscInt i; 651 PetscObjectState state; 652 653 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 654 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 655 656 PetscFunctionBegin; 657 658 /* If there are no nonzero entries, set zz = yy and return immediately. */ 659 if(!a->nz) { 660 PetscInt i; 661 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 662 for (i=0; i<n; i++) { 663 z[i] = y[i]; 664 } 665 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 670 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 671 672 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 673 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 674 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 675 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 676 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 677 MatSeqAIJMKL_create_mkl_handle(A); 678 } 679 680 /* Call MKL sparse BLAS routine to do the MatMult. */ 681 if (zz == yy) { 682 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 683 * with alpha and beta both set to 1.0. */ 684 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 685 } else { 686 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 687 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 688 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 689 for (i=0; i<n; i++) { 690 z[i] += y[i]; 691 } 692 } 693 694 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 695 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 696 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 697 if (stat != SPARSE_STATUS_SUCCESS) { 698 PetscFunctionReturn(PETSC_ERR_LIB); 699 } 700 PetscFunctionReturn(0); 701 } 702 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 703 704 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 705 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 706 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 707 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 708 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 709 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 710 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 711 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 712 { 713 Mat_SeqAIJMKL *a, *b; 714 sparse_matrix_t csrA, csrB, csrC; 715 PetscErrorCode ierr; 716 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 717 PetscObjectState state; 718 719 PetscFunctionBegin; 720 a = (Mat_SeqAIJMKL*)A->spptr; 721 b = (Mat_SeqAIJMKL*)B->spptr; 722 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 723 if (!a->sparse_optimized || a->state != state) { 724 MatSeqAIJMKL_create_mkl_handle(A); 725 } 726 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 727 if (!b->sparse_optimized || b->state != state) { 728 MatSeqAIJMKL_create_mkl_handle(B); 729 } 730 csrA = a->csrA; 731 csrB = b->csrA; 732 733 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 734 if (stat != SPARSE_STATUS_SUCCESS) { 735 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 736 PetscFunctionReturn(PETSC_ERR_LIB); 737 } 738 739 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 740 741 PetscFunctionReturn(0); 742 } 743 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 744 745 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 746 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 747 { 748 Mat_SeqAIJMKL *a, *b; 749 sparse_matrix_t csrA, csrB, csrC; 750 PetscErrorCode ierr; 751 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 752 PetscObjectState state; 753 754 PetscFunctionBegin; 755 a = (Mat_SeqAIJMKL*)A->spptr; 756 b = (Mat_SeqAIJMKL*)B->spptr; 757 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 758 if (!a->sparse_optimized || a->state != state) { 759 MatSeqAIJMKL_create_mkl_handle(A); 760 } 761 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 762 if (!b->sparse_optimized || b->state != state) { 763 MatSeqAIJMKL_create_mkl_handle(B); 764 } 765 csrA = a->csrA; 766 csrB = b->csrA; 767 768 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 769 if (stat != SPARSE_STATUS_SUCCESS) { 770 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 771 PetscFunctionReturn(PETSC_ERR_LIB); 772 } 773 774 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 775 776 PetscFunctionReturn(0); 777 } 778 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 779 780 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 781 { 782 PetscErrorCode ierr; 783 784 PetscFunctionBegin; 785 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 786 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 791 { 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 796 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 800 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 801 { 802 PetscErrorCode ierr; 803 804 PetscFunctionBegin; 805 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 806 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 811 { 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 816 if (str == SAME_NONZERO_PATTERN) { 817 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 818 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 824 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 825 * routine, but can also be used to convert an assembled SeqAIJ matrix 826 * into a SeqAIJMKL one. */ 827 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 828 { 829 PetscErrorCode ierr; 830 Mat B = *newmat; 831 Mat_SeqAIJMKL *aijmkl; 832 PetscBool set; 833 PetscBool sametype; 834 835 PetscFunctionBegin; 836 if (reuse == MAT_INITIAL_MATRIX) { 837 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 838 } 839 840 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 841 if (sametype) PetscFunctionReturn(0); 842 843 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 844 B->spptr = (void*) aijmkl; 845 846 /* Set function pointers for methods that we inherit from AIJ but override. 847 * We also parse some command line options below, since those determine some of the methods we point to. */ 848 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 849 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 850 B->ops->destroy = MatDestroy_SeqAIJMKL; 851 852 aijmkl->sparse_optimized = PETSC_FALSE; 853 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 854 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 855 #else 856 aijmkl->no_SpMV2 = PETSC_TRUE; 857 #endif 858 aijmkl->eager_inspection = PETSC_FALSE; 859 860 /* Parse command line options. */ 861 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 862 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 863 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 864 ierr = PetscOptionsEnd();CHKERRQ(ierr); 865 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 866 if(!aijmkl->no_SpMV2) { 867 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 868 aijmkl->no_SpMV2 = PETSC_TRUE; 869 } 870 #endif 871 872 if(!aijmkl->no_SpMV2) { 873 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 874 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 875 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 876 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 877 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 878 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 879 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 880 #endif 881 } else { 882 B->ops->mult = MatMult_SeqAIJMKL; 883 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 884 B->ops->multadd = MatMultAdd_SeqAIJMKL; 885 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 886 } 887 888 B->ops->scale = MatScale_SeqAIJMKL; 889 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 890 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 891 B->ops->axpy = MatAXPY_SeqAIJMKL; 892 893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 898 if(!aijmkl->no_SpMV2) { 899 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 902 #endif 903 } 904 905 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 906 *newmat = B; 907 PetscFunctionReturn(0); 908 } 909 910 /*@C 911 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 912 This type inherits from AIJ and is largely identical, but uses sparse BLAS 913 routines from Intel MKL whenever possible. 914 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 915 operations are currently supported. 916 If the installed version of MKL supports the "SpMV2" sparse 917 inspector-executor routines, then those are used by default. 918 919 Collective on MPI_Comm 920 921 Input Parameters: 922 + comm - MPI communicator, set to PETSC_COMM_SELF 923 . m - number of rows 924 . n - number of columns 925 . nz - number of nonzeros per row (same for all rows) 926 - nnz - array containing the number of nonzeros in the various rows 927 (possibly different for each row) or NULL 928 929 Output Parameter: 930 . A - the matrix 931 932 Options Database Keys: 933 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 934 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 935 936 Notes: 937 If nnz is given then nz is ignored 938 939 Level: intermediate 940 941 .keywords: matrix, MKL, sparse, parallel 942 943 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 944 @*/ 945 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 946 { 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = MatCreate(comm,A);CHKERRQ(ierr); 951 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 952 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 953 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 958 { 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 963 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966