1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 53 B->ops->scale = MatScale_SeqAIJ; 54 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 55 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 56 B->ops->axpy = MatAXPY_SeqAIJ; 57 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 if(!aijmkl->no_SpMV2) { 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 } 67 68 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69 * simply involves destroying the MKL sparse matrix handle and then freeing 70 * the spptr pointer. */ 71 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 72 73 if (aijmkl->sparse_optimized) { 74 sparse_status_t stat; 75 stat = mkl_sparse_destroy(aijmkl->csrA); 76 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 77 } 78 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 79 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 80 81 /* Change the type of B to MATSEQAIJ. */ 82 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 83 84 *newmat = B; 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 89 { 90 PetscErrorCode ierr; 91 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 92 93 PetscFunctionBegin; 94 95 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 96 * spptr pointer. */ 97 if (aijmkl) { 98 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 99 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 100 if (aijmkl->sparse_optimized) { 101 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 102 stat = mkl_sparse_destroy(aijmkl->csrA); 103 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 104 } 105 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 106 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 107 } 108 109 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 110 * to destroy everything that remains. */ 111 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 112 /* Note that I don't call MatSetType(). I believe this is because that 113 * is only to be called when *building* a matrix. I could be wrong, but 114 * that is how things work for the SuperLU matrix class. */ 115 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 120 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 121 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 122 * handle, creates a new one, and then calls mkl_sparse_optimize(). 123 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 124 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 125 * an unoptimized matrix handle here. */ 126 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 127 { 128 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 129 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 130 * does nothing. We make it callable anyway in this case because it cuts 131 * down on littering the code with #ifdefs. */ 132 PetscFunctionBegin; 133 PetscFunctionReturn(0); 134 #else 135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 136 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 137 PetscInt m,n; 138 MatScalar *aa; 139 PetscInt *aj,*ai; 140 sparse_status_t stat; 141 PetscErrorCode ierr; 142 PetscObjectState state; 143 144 PetscFunctionBegin; 145 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 146 147 if (aijmkl->sparse_optimized) { 148 /* Matrix has been previously assembled and optimized. Must destroy old 149 * matrix handle before running the optimization step again. */ 150 stat = mkl_sparse_destroy(aijmkl->csrA); 151 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 152 } 153 aijmkl->sparse_optimized = PETSC_FALSE; 154 155 /* Now perform the SpMV2 setup and matrix optimization. */ 156 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 157 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 158 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 159 m = A->rmap->n; 160 n = A->cmap->n; 161 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 162 aa = a->a; /* Nonzero elements stored row-by-row. */ 163 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 164 if ((a->nz!=0) & !(A->structure_only)) { 165 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 166 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 167 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 168 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 169 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 170 stat = mkl_sparse_optimize(aijmkl->csrA); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 172 aijmkl->sparse_optimized = PETSC_TRUE; 173 } 174 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 175 176 PetscFunctionReturn(0); 177 #endif 178 } 179 180 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 181 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 182 * matrix handle. 183 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 184 * there is no good alternative. */ 185 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 186 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 187 { 188 PetscErrorCode ierr; 189 sparse_status_t stat; 190 sparse_index_base_t indexing; 191 PetscInt nrows, ncols; 192 PetscInt *aj,*ai,*dummy; 193 MatScalar *aa; 194 Mat A; 195 Mat_SeqAIJMKL *aijmkl; 196 197 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 198 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 199 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 200 201 if (reuse == MAT_REUSE_MATRIX) { 202 ierr = MatDestroy(mat);CHKERRQ(ierr); 203 } 204 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 205 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 206 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 207 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 208 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 209 * they will be destroyed when the MKL handle is destroyed. 210 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 211 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 212 213 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 214 * Now turn it into a MATSEQAIJMKL. */ 215 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 216 217 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 218 aijmkl->csrA = csrA; 219 220 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 221 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 222 * and just need to be able to run the MKL optimization step. */ 223 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 224 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 225 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 226 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 227 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 228 stat = mkl_sparse_optimize(aijmkl->csrA); 229 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 230 aijmkl->sparse_optimized = PETSC_TRUE; 231 232 *mat = A; 233 PetscFunctionReturn(0); 234 } 235 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 236 237 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 238 { 239 PetscErrorCode ierr; 240 Mat_SeqAIJMKL *aijmkl; 241 Mat_SeqAIJMKL *aijmkl_dest; 242 243 PetscFunctionBegin; 244 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 245 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 246 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 247 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 248 aijmkl_dest->sparse_optimized = PETSC_FALSE; 249 if (aijmkl->eager_inspection) { 250 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 251 } 252 PetscFunctionReturn(0); 253 } 254 255 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 256 { 257 PetscErrorCode ierr; 258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 259 Mat_SeqAIJMKL *aijmkl; 260 261 PetscFunctionBegin; 262 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 263 264 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 265 * extra information and some different methods, call the AssemblyEnd 266 * routine for a MATSEQAIJ. 267 * I'm not sure if this is the best way to do this, but it avoids 268 * a lot of code duplication. */ 269 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 270 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 271 272 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 273 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 274 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 275 if (aijmkl->eager_inspection) { 276 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 277 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 278 } else if (aijmkl->sparse_optimized) { 279 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 280 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 281 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 282 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 283 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 284 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 285 aijmkl->sparse_optimized = PETSC_FALSE; 286 #endif 287 } 288 289 PetscFunctionReturn(0); 290 } 291 292 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 293 { 294 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 295 const PetscScalar *x; 296 PetscScalar *y; 297 const MatScalar *aa; 298 PetscErrorCode ierr; 299 PetscInt m=A->rmap->n; 300 PetscInt n=A->cmap->n; 301 PetscScalar alpha = 1.0; 302 PetscScalar beta = 0.0; 303 const PetscInt *aj,*ai; 304 char matdescra[6]; 305 306 307 /* Variables not in MatMult_SeqAIJ. */ 308 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 309 310 PetscFunctionBegin; 311 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 312 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 313 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 314 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 315 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 316 aa = a->a; /* Nonzero elements stored row-by-row. */ 317 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 318 319 /* Call MKL sparse BLAS routine to do the MatMult. */ 320 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 321 322 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 329 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 330 { 331 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 332 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 333 const PetscScalar *x; 334 PetscScalar *y; 335 PetscErrorCode ierr; 336 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 337 PetscObjectState state; 338 339 PetscFunctionBegin; 340 341 /* If there are no nonzero entries, zero yy and return immediately. */ 342 if(!a->nz) { 343 PetscInt i; 344 PetscInt m=A->rmap->n; 345 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 346 for (i=0; i<m; i++) { 347 y[i] = 0.0; 348 } 349 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355 356 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 357 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 358 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 359 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 360 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 361 MatSeqAIJMKL_create_mkl_handle(A); 362 } 363 364 /* Call MKL SpMV2 executor routine to do the MatMult. */ 365 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 366 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 367 368 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 369 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 370 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 374 375 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 376 { 377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 378 const PetscScalar *x; 379 PetscScalar *y; 380 const MatScalar *aa; 381 PetscErrorCode ierr; 382 PetscInt m=A->rmap->n; 383 PetscInt n=A->cmap->n; 384 PetscScalar alpha = 1.0; 385 PetscScalar beta = 0.0; 386 const PetscInt *aj,*ai; 387 char matdescra[6]; 388 389 /* Variables not in MatMultTranspose_SeqAIJ. */ 390 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 391 392 PetscFunctionBegin; 393 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 394 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 395 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 398 aa = a->a; /* Nonzero elements stored row-by-row. */ 399 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 400 401 /* Call MKL sparse BLAS routine to do the MatMult. */ 402 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 403 404 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 406 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 411 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 412 { 413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 414 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 415 const PetscScalar *x; 416 PetscScalar *y; 417 PetscErrorCode ierr; 418 sparse_status_t stat; 419 PetscObjectState state; 420 421 PetscFunctionBegin; 422 423 /* If there are no nonzero entries, zero yy and return immediately. */ 424 if(!a->nz) { 425 PetscInt i; 426 PetscInt n=A->cmap->n; 427 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 428 for (i=0; i<n; i++) { 429 y[i] = 0.0; 430 } 431 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 436 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 437 438 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 439 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 440 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 441 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 442 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 443 MatSeqAIJMKL_create_mkl_handle(A); 444 } 445 446 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 447 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 448 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 449 450 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 452 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 456 457 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 460 const PetscScalar *x; 461 PetscScalar *y,*z; 462 const MatScalar *aa; 463 PetscErrorCode ierr; 464 PetscInt m=A->rmap->n; 465 PetscInt n=A->cmap->n; 466 const PetscInt *aj,*ai; 467 PetscInt i; 468 469 /* Variables not in MatMultAdd_SeqAIJ. */ 470 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 471 PetscScalar alpha = 1.0; 472 PetscScalar beta; 473 char matdescra[6]; 474 475 PetscFunctionBegin; 476 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 477 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 478 479 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 481 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 482 aa = a->a; /* Nonzero elements stored row-by-row. */ 483 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 484 485 /* Call MKL sparse BLAS routine to do the MatMult. */ 486 if (zz == yy) { 487 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 488 beta = 1.0; 489 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 490 } else { 491 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 492 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 493 beta = 0.0; 494 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 495 for (i=0; i<m; i++) { 496 z[i] += y[i]; 497 } 498 } 499 500 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 502 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 507 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 508 { 509 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 510 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 511 const PetscScalar *x; 512 PetscScalar *y,*z; 513 PetscErrorCode ierr; 514 PetscInt m=A->rmap->n; 515 PetscInt i; 516 517 /* Variables not in MatMultAdd_SeqAIJ. */ 518 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 519 PetscObjectState state; 520 521 PetscFunctionBegin; 522 523 /* If there are no nonzero entries, set zz = yy and return immediately. */ 524 if(!a->nz) { 525 PetscInt i; 526 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 527 for (i=0; i<m; i++) { 528 z[i] = y[i]; 529 } 530 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 536 537 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 538 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 539 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 540 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 541 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 542 MatSeqAIJMKL_create_mkl_handle(A); 543 } 544 545 /* Call MKL sparse BLAS routine to do the MatMult. */ 546 if (zz == yy) { 547 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 548 * with alpha and beta both set to 1.0. */ 549 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 550 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 551 } else { 552 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 553 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 554 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 555 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 556 for (i=0; i<m; i++) { 557 z[i] += y[i]; 558 } 559 } 560 561 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 562 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 567 568 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 569 { 570 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 571 const PetscScalar *x; 572 PetscScalar *y,*z; 573 const MatScalar *aa; 574 PetscErrorCode ierr; 575 PetscInt m=A->rmap->n; 576 PetscInt n=A->cmap->n; 577 const PetscInt *aj,*ai; 578 PetscInt i; 579 580 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 581 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 582 PetscScalar alpha = 1.0; 583 PetscScalar beta; 584 char matdescra[6]; 585 586 PetscFunctionBegin; 587 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 588 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 589 590 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 591 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 592 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 593 aa = a->a; /* Nonzero elements stored row-by-row. */ 594 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 595 596 /* Call MKL sparse BLAS routine to do the MatMult. */ 597 if (zz == yy) { 598 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 599 beta = 1.0; 600 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 601 } else { 602 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 603 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 604 beta = 0.0; 605 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 606 for (i=0; i<n; i++) { 607 z[i] += y[i]; 608 } 609 } 610 611 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 612 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 613 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 618 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 619 { 620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 621 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 622 const PetscScalar *x; 623 PetscScalar *y,*z; 624 PetscErrorCode ierr; 625 PetscInt n=A->cmap->n; 626 PetscInt i; 627 PetscObjectState state; 628 629 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 630 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 631 632 PetscFunctionBegin; 633 634 /* If there are no nonzero entries, set zz = yy and return immediately. */ 635 if(!a->nz) { 636 PetscInt i; 637 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 638 for (i=0; i<n; i++) { 639 z[i] = y[i]; 640 } 641 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 646 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 647 648 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 649 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 650 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 651 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 652 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 653 MatSeqAIJMKL_create_mkl_handle(A); 654 } 655 656 /* Call MKL sparse BLAS routine to do the MatMult. */ 657 if (zz == yy) { 658 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 659 * with alpha and beta both set to 1.0. */ 660 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 661 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 662 } else { 663 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 664 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 665 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 666 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 667 for (i=0; i<n; i++) { 668 z[i] += y[i]; 669 } 670 } 671 672 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 673 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 674 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 678 679 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 680 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 681 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 682 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 683 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 684 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 685 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 686 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 687 { 688 Mat_SeqAIJMKL *a, *b; 689 sparse_matrix_t csrA, csrB, csrC; 690 PetscErrorCode ierr; 691 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 692 PetscObjectState state; 693 694 PetscFunctionBegin; 695 a = (Mat_SeqAIJMKL*)A->spptr; 696 b = (Mat_SeqAIJMKL*)B->spptr; 697 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 698 if (!a->sparse_optimized || a->state != state) { 699 MatSeqAIJMKL_create_mkl_handle(A); 700 } 701 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 702 if (!b->sparse_optimized || b->state != state) { 703 MatSeqAIJMKL_create_mkl_handle(B); 704 } 705 csrA = a->csrA; 706 csrB = b->csrA; 707 708 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 709 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 710 711 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 712 713 PetscFunctionReturn(0); 714 } 715 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 716 717 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 718 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 719 { 720 Mat_SeqAIJMKL *a, *b; 721 sparse_matrix_t csrA, csrB, csrC; 722 PetscErrorCode ierr; 723 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 724 PetscObjectState state; 725 726 PetscFunctionBegin; 727 a = (Mat_SeqAIJMKL*)A->spptr; 728 b = (Mat_SeqAIJMKL*)B->spptr; 729 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 730 if (!a->sparse_optimized || a->state != state) { 731 MatSeqAIJMKL_create_mkl_handle(A); 732 } 733 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 734 if (!b->sparse_optimized || b->state != state) { 735 MatSeqAIJMKL_create_mkl_handle(B); 736 } 737 csrA = a->csrA; 738 csrB = b->csrA; 739 740 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 741 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 742 743 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 744 745 PetscFunctionReturn(0); 746 } 747 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 748 749 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 750 { 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 755 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 760 { 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 765 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 770 { 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 775 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 780 { 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 785 if (str == SAME_NONZERO_PATTERN) { 786 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 787 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 788 } 789 PetscFunctionReturn(0); 790 } 791 792 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 793 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 794 * routine, but can also be used to convert an assembled SeqAIJ matrix 795 * into a SeqAIJMKL one. */ 796 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 797 { 798 PetscErrorCode ierr; 799 Mat B = *newmat; 800 Mat_SeqAIJMKL *aijmkl; 801 PetscBool set; 802 PetscBool sametype; 803 804 PetscFunctionBegin; 805 if (reuse == MAT_INITIAL_MATRIX) { 806 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 807 } 808 809 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 810 if (sametype) PetscFunctionReturn(0); 811 812 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 813 B->spptr = (void*) aijmkl; 814 815 /* Set function pointers for methods that we inherit from AIJ but override. 816 * We also parse some command line options below, since those determine some of the methods we point to. */ 817 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 818 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 819 B->ops->destroy = MatDestroy_SeqAIJMKL; 820 821 aijmkl->sparse_optimized = PETSC_FALSE; 822 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 823 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 824 #else 825 aijmkl->no_SpMV2 = PETSC_TRUE; 826 #endif 827 aijmkl->eager_inspection = PETSC_FALSE; 828 829 /* Parse command line options. */ 830 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 831 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 832 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 833 ierr = PetscOptionsEnd();CHKERRQ(ierr); 834 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 835 if(!aijmkl->no_SpMV2) { 836 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 837 aijmkl->no_SpMV2 = PETSC_TRUE; 838 } 839 #endif 840 841 if(!aijmkl->no_SpMV2) { 842 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 843 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 844 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 845 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 846 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 847 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 848 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 849 #endif 850 } else { 851 B->ops->mult = MatMult_SeqAIJMKL; 852 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 853 B->ops->multadd = MatMultAdd_SeqAIJMKL; 854 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 855 } 856 857 B->ops->scale = MatScale_SeqAIJMKL; 858 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 859 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 860 B->ops->axpy = MatAXPY_SeqAIJMKL; 861 862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 867 if(!aijmkl->no_SpMV2) { 868 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 871 #endif 872 } 873 874 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 875 *newmat = B; 876 PetscFunctionReturn(0); 877 } 878 879 /*@C 880 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 881 This type inherits from AIJ and is largely identical, but uses sparse BLAS 882 routines from Intel MKL whenever possible. 883 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 884 operations are currently supported. 885 If the installed version of MKL supports the "SpMV2" sparse 886 inspector-executor routines, then those are used by default. 887 888 Collective on MPI_Comm 889 890 Input Parameters: 891 + comm - MPI communicator, set to PETSC_COMM_SELF 892 . m - number of rows 893 . n - number of columns 894 . nz - number of nonzeros per row (same for all rows) 895 - nnz - array containing the number of nonzeros in the various rows 896 (possibly different for each row) or NULL 897 898 Output Parameter: 899 . A - the matrix 900 901 Options Database Keys: 902 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 903 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 904 905 Notes: 906 If nnz is given then nz is ignored 907 908 Level: intermediate 909 910 .keywords: matrix, MKL, sparse, parallel 911 912 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 913 @*/ 914 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 915 { 916 PetscErrorCode ierr; 917 918 PetscFunctionBegin; 919 ierr = MatCreate(comm,A);CHKERRQ(ierr); 920 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 921 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 922 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 927 { 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 932 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935