1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 52 B->ops->scale = MatScale_SeqAIJ; 53 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 54 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 55 B->ops->axpy = MatAXPY_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 } 66 67 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 68 * simply involves destroying the MKL sparse matrix handle and then freeing 69 * the spptr pointer. */ 70 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 71 72 if (aijmkl->sparse_optimized) { 73 sparse_status_t stat; 74 stat = mkl_sparse_destroy(aijmkl->csrA); 75 if (stat != SPARSE_STATUS_SUCCESS) { 76 PetscFunctionReturn(PETSC_ERR_LIB); 77 } 78 } 79 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 80 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 81 82 /* Change the type of B to MATSEQAIJ. */ 83 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 84 85 *newmat = B; 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 90 { 91 PetscErrorCode ierr; 92 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 93 94 PetscFunctionBegin; 95 96 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 97 * spptr pointer. */ 98 if (aijmkl) { 99 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 100 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 101 if (aijmkl->sparse_optimized) { 102 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 103 stat = mkl_sparse_destroy(aijmkl->csrA); 104 if (stat != SPARSE_STATUS_SUCCESS) { 105 PetscFunctionReturn(PETSC_ERR_LIB); 106 } 107 } 108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 109 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 110 } 111 112 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 113 * to destroy everything that remains. */ 114 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 115 /* Note that I don't call MatSetType(). I believe this is because that 116 * is only to be called when *building* a matrix. I could be wrong, but 117 * that is how things work for the SuperLU matrix class. */ 118 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 123 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 124 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 125 * handle, creates a new one, and then calls mkl_sparse_optimize(). 126 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 127 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 128 * an unoptimized matrix handle here. */ 129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 130 { 131 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 132 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 133 * does nothing. We make it callable anyway in this case because it cuts 134 * down on littering the code with #ifdefs. */ 135 PetscFunctionBegin; 136 PetscFunctionReturn(0); 137 #else 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 139 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 140 PetscInt m,n; 141 MatScalar *aa; 142 PetscInt *aj,*ai; 143 sparse_status_t stat; 144 145 PetscFunctionBegin; 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 148 if (aijmkl->sparse_optimized) { 149 /* Matrix has been previously assembled and optimized. Must destroy old 150 * matrix handle before running the optimization step again. */ 151 stat = mkl_sparse_destroy(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) { 153 PetscFunctionReturn(PETSC_ERR_LIB); 154 } 155 } 156 aijmkl->sparse_optimized = PETSC_FALSE; 157 158 /* Now perform the SpMV2 setup and matrix optimization. */ 159 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 162 m = A->rmap->n; 163 n = A->cmap->n; 164 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165 aa = a->a; /* Nonzero elements stored row-by-row. */ 166 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 167 if ((a->nz!=0) & !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173 stat = mkl_sparse_optimize(aijmkl->csrA); 174 if (stat != SPARSE_STATUS_SUCCESS) { 175 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 176 PetscFunctionReturn(PETSC_ERR_LIB); 177 } 178 aijmkl->sparse_optimized = PETSC_TRUE; 179 } 180 181 PetscFunctionReturn(0); 182 #endif 183 } 184 185 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 186 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 187 * matrix handle. 188 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 189 * there is no good alternative. */ 190 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 191 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 192 { 193 PetscErrorCode ierr; 194 sparse_status_t stat; 195 sparse_index_base_t indexing; 196 PetscInt nrows, ncols; 197 PetscInt *aj,*ai,*dummy; 198 MatScalar *aa; 199 Mat A; 200 Mat_SeqAIJ *a; 201 Mat_SeqAIJMKL *aijmkl; 202 203 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 205 if (stat != SPARSE_STATUS_SUCCESS) { 206 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 207 PetscFunctionReturn(PETSC_ERR_LIB); 208 } 209 210 if (reuse == MAT_REUSE_MATRIX) { 211 ierr = MatDestroy(mat);CHKERRQ(ierr); 212 } 213 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 214 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 215 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 216 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 217 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 218 * they will be destroyed when the MKL handle is destroyed. 219 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 220 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 221 222 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 223 * Now turn it into a MATSEQAIJMKL. */ 224 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 225 226 a = (Mat_SeqAIJ*)A->data; 227 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 228 aijmkl->csrA = csrA; 229 230 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 231 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 232 * and just need to be able to run the MKL optimization step. */ 233 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 234 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 235 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 236 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 237 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 238 stat = mkl_sparse_optimize(aijmkl->csrA); 239 if (stat != SPARSE_STATUS_SUCCESS) { 240 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 241 PetscFunctionReturn(PETSC_ERR_LIB); 242 } 243 aijmkl->sparse_optimized = PETSC_TRUE; 244 245 *mat = A; 246 PetscFunctionReturn(0); 247 } 248 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 249 250 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 251 { 252 PetscErrorCode ierr; 253 Mat_SeqAIJMKL *aijmkl; 254 Mat_SeqAIJMKL *aijmkl_dest; 255 256 PetscFunctionBegin; 257 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 258 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 259 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 260 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 261 aijmkl_dest->sparse_optimized = PETSC_FALSE; 262 if (aijmkl->eager_inspection) { 263 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 264 } 265 PetscFunctionReturn(0); 266 } 267 268 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 269 { 270 PetscErrorCode ierr; 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 272 Mat_SeqAIJMKL *aijmkl; 273 274 PetscFunctionBegin; 275 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 276 277 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 278 * extra information and some different methods, call the AssemblyEnd 279 * routine for a MATSEQAIJ. 280 * I'm not sure if this is the best way to do this, but it avoids 281 * a lot of code duplication. */ 282 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 283 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 284 285 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 286 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 287 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 288 if (aijmkl->eager_inspection) { 289 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 290 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 291 } else if (aijmkl->sparse_optimized) { 292 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 293 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 294 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 295 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 296 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 297 if (stat != SPARSE_STATUS_SUCCESS) { 298 PetscFunctionReturn(PETSC_ERR_LIB); 299 } 300 aijmkl->sparse_optimized = PETSC_FALSE; 301 #endif 302 } 303 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 308 { 309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 310 const PetscScalar *x; 311 PetscScalar *y; 312 const MatScalar *aa; 313 PetscErrorCode ierr; 314 PetscInt m=A->rmap->n; 315 PetscInt n=A->cmap->n; 316 PetscScalar alpha = 1.0; 317 PetscScalar beta = 0.0; 318 const PetscInt *aj,*ai; 319 char matdescra[6]; 320 321 322 /* Variables not in MatMult_SeqAIJ. */ 323 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 324 325 PetscFunctionBegin; 326 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 327 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 328 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 329 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 330 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 331 aa = a->a; /* Nonzero elements stored row-by-row. */ 332 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 333 334 /* Call MKL sparse BLAS routine to do the MatMult. */ 335 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 336 337 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 338 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 344 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 345 { 346 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 347 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 348 const PetscScalar *x; 349 PetscScalar *y; 350 PetscErrorCode ierr; 351 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 352 353 PetscFunctionBegin; 354 355 /* If there are no nonzero entries, zero yy and return immediately. */ 356 if(!a->nz) { 357 PetscInt i; 358 PetscInt m=A->rmap->n; 359 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 360 for (i=0; i<m; i++) { 361 y[i] = 0.0; 362 } 363 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 367 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 368 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369 370 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 371 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 372 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 373 if (!aijmkl->sparse_optimized) { 374 MatSeqAIJMKL_create_mkl_handle(A); 375 } 376 377 /* Call MKL SpMV2 executor routine to do the MatMult. */ 378 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 379 380 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 381 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 382 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 383 if (stat != SPARSE_STATUS_SUCCESS) { 384 PetscFunctionReturn(PETSC_ERR_LIB); 385 } 386 PetscFunctionReturn(0); 387 } 388 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 389 390 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 391 { 392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 393 const PetscScalar *x; 394 PetscScalar *y; 395 const MatScalar *aa; 396 PetscErrorCode ierr; 397 PetscInt m=A->rmap->n; 398 PetscInt n=A->cmap->n; 399 PetscScalar alpha = 1.0; 400 PetscScalar beta = 0.0; 401 const PetscInt *aj,*ai; 402 char matdescra[6]; 403 404 /* Variables not in MatMultTranspose_SeqAIJ. */ 405 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 406 407 PetscFunctionBegin; 408 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 409 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 410 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 412 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 413 aa = a->a; /* Nonzero elements stored row-by-row. */ 414 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 415 416 /* Call MKL sparse BLAS routine to do the MatMult. */ 417 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 418 419 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 426 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 427 { 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 429 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 430 const PetscScalar *x; 431 PetscScalar *y; 432 PetscErrorCode ierr; 433 sparse_status_t stat; 434 435 PetscFunctionBegin; 436 437 /* If there are no nonzero entries, zero yy and return immediately. */ 438 if(!a->nz) { 439 PetscInt i; 440 PetscInt n=A->cmap->n; 441 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442 for (i=0; i<n; i++) { 443 y[i] = 0.0; 444 } 445 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 451 452 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 453 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 454 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 455 if (!aijmkl->sparse_optimized) { 456 MatSeqAIJMKL_create_mkl_handle(A); 457 } 458 459 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 460 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 461 462 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 464 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 465 if (stat != SPARSE_STATUS_SUCCESS) { 466 PetscFunctionReturn(PETSC_ERR_LIB); 467 } 468 PetscFunctionReturn(0); 469 } 470 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 471 472 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 475 const PetscScalar *x; 476 PetscScalar *y,*z; 477 const MatScalar *aa; 478 PetscErrorCode ierr; 479 PetscInt m=A->rmap->n; 480 PetscInt n=A->cmap->n; 481 const PetscInt *aj,*ai; 482 PetscInt i; 483 484 /* Variables not in MatMultAdd_SeqAIJ. */ 485 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 486 PetscScalar alpha = 1.0; 487 PetscScalar beta; 488 char matdescra[6]; 489 490 PetscFunctionBegin; 491 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 492 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 493 494 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 495 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 496 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 497 aa = a->a; /* Nonzero elements stored row-by-row. */ 498 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 499 500 /* Call MKL sparse BLAS routine to do the MatMult. */ 501 if (zz == yy) { 502 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 503 beta = 1.0; 504 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 505 } else { 506 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 507 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 508 beta = 0.0; 509 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 510 for (i=0; i<m; i++) { 511 z[i] += y[i]; 512 } 513 } 514 515 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 516 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 517 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 522 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 523 { 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 525 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 526 const PetscScalar *x; 527 PetscScalar *y,*z; 528 PetscErrorCode ierr; 529 PetscInt m=A->rmap->n; 530 PetscInt i; 531 532 /* Variables not in MatMultAdd_SeqAIJ. */ 533 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 534 535 PetscFunctionBegin; 536 537 /* If there are no nonzero entries, set zz = yy and return immediately. */ 538 if(!a->nz) { 539 PetscInt i; 540 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 541 for (i=0; i<m; i++) { 542 z[i] = y[i]; 543 } 544 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 549 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 550 551 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 552 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 553 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 554 if (!aijmkl->sparse_optimized) { 555 MatSeqAIJMKL_create_mkl_handle(A); 556 } 557 558 /* Call MKL sparse BLAS routine to do the MatMult. */ 559 if (zz == yy) { 560 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 561 * with alpha and beta both set to 1.0. */ 562 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 563 } else { 564 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 565 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 566 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 567 for (i=0; i<m; i++) { 568 z[i] += y[i]; 569 } 570 } 571 572 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 573 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 574 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 575 if (stat != SPARSE_STATUS_SUCCESS) { 576 PetscFunctionReturn(PETSC_ERR_LIB); 577 } 578 PetscFunctionReturn(0); 579 } 580 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 581 582 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 const PetscScalar *x; 586 PetscScalar *y,*z; 587 const MatScalar *aa; 588 PetscErrorCode ierr; 589 PetscInt m=A->rmap->n; 590 PetscInt n=A->cmap->n; 591 const PetscInt *aj,*ai; 592 PetscInt i; 593 594 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 595 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 596 PetscScalar alpha = 1.0; 597 PetscScalar beta; 598 char matdescra[6]; 599 600 PetscFunctionBegin; 601 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 602 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 603 604 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 606 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 607 aa = a->a; /* Nonzero elements stored row-by-row. */ 608 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 609 610 /* Call MKL sparse BLAS routine to do the MatMult. */ 611 if (zz == yy) { 612 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 613 beta = 1.0; 614 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 615 } else { 616 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 617 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 618 beta = 0.0; 619 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 620 for (i=0; i<n; i++) { 621 z[i] += y[i]; 622 } 623 } 624 625 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 626 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 627 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 632 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 633 { 634 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 635 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 636 const PetscScalar *x; 637 PetscScalar *y,*z; 638 PetscErrorCode ierr; 639 PetscInt n=A->cmap->n; 640 PetscInt i; 641 642 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 643 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 644 645 PetscFunctionBegin; 646 647 /* If there are no nonzero entries, set zz = yy and return immediately. */ 648 if(!a->nz) { 649 PetscInt i; 650 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 651 for (i=0; i<n; i++) { 652 z[i] = y[i]; 653 } 654 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 659 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 660 661 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 662 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 663 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 664 if (!aijmkl->sparse_optimized) { 665 MatSeqAIJMKL_create_mkl_handle(A); 666 } 667 668 /* Call MKL sparse BLAS routine to do the MatMult. */ 669 if (zz == yy) { 670 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 671 * with alpha and beta both set to 1.0. */ 672 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 673 } else { 674 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 675 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 676 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 677 for (i=0; i<n; i++) { 678 z[i] += y[i]; 679 } 680 } 681 682 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 683 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 684 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 if (stat != SPARSE_STATUS_SUCCESS) { 686 PetscFunctionReturn(PETSC_ERR_LIB); 687 } 688 PetscFunctionReturn(0); 689 } 690 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 691 692 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 693 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 694 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 695 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 696 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 697 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 698 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 699 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 700 { 701 Mat_SeqAIJMKL *a, *b; 702 sparse_matrix_t csrA, csrB, csrC; 703 PetscErrorCode ierr; 704 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 705 706 PetscFunctionBegin; 707 a = (Mat_SeqAIJMKL*)A->spptr; 708 b = (Mat_SeqAIJMKL*)B->spptr; 709 if (!a->sparse_optimized) { 710 MatSeqAIJMKL_create_mkl_handle(A); 711 } 712 if (!b->sparse_optimized) { 713 MatSeqAIJMKL_create_mkl_handle(B); 714 } 715 csrA = a->csrA; 716 csrB = b->csrA; 717 718 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 719 if (stat != SPARSE_STATUS_SUCCESS) { 720 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 721 PetscFunctionReturn(PETSC_ERR_LIB); 722 } 723 724 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 725 726 PetscFunctionReturn(0); 727 } 728 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 729 730 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 731 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 732 { 733 Mat_SeqAIJMKL *a, *b; 734 sparse_matrix_t csrA, csrB, csrC; 735 PetscErrorCode ierr; 736 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 737 738 PetscFunctionBegin; 739 a = (Mat_SeqAIJMKL*)A->spptr; 740 b = (Mat_SeqAIJMKL*)B->spptr; 741 if (!a->sparse_optimized) { 742 MatSeqAIJMKL_create_mkl_handle(A); 743 } 744 if (!b->sparse_optimized) { 745 MatSeqAIJMKL_create_mkl_handle(B); 746 } 747 csrA = a->csrA; 748 csrB = b->csrA; 749 750 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 751 if (stat != SPARSE_STATUS_SUCCESS) { 752 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 753 PetscFunctionReturn(PETSC_ERR_LIB); 754 } 755 756 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 757 758 PetscFunctionReturn(0); 759 } 760 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 761 762 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 763 { 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 768 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 773 { 774 PetscErrorCode ierr; 775 776 PetscFunctionBegin; 777 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 778 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 783 { 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 788 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 793 { 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 798 if (str == SAME_NONZERO_PATTERN) { 799 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 800 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 801 } 802 PetscFunctionReturn(0); 803 } 804 805 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 806 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 807 * routine, but can also be used to convert an assembled SeqAIJ matrix 808 * into a SeqAIJMKL one. */ 809 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 810 { 811 PetscErrorCode ierr; 812 Mat B = *newmat; 813 Mat_SeqAIJMKL *aijmkl; 814 PetscBool set; 815 PetscBool sametype; 816 817 PetscFunctionBegin; 818 if (reuse == MAT_INITIAL_MATRIX) { 819 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 820 } 821 822 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 823 if (sametype) PetscFunctionReturn(0); 824 825 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 826 B->spptr = (void*) aijmkl; 827 828 /* Set function pointers for methods that we inherit from AIJ but override. 829 * We also parse some command line options below, since those determine some of the methods we point to. */ 830 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 831 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 832 B->ops->destroy = MatDestroy_SeqAIJMKL; 833 834 aijmkl->sparse_optimized = PETSC_FALSE; 835 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 836 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 837 #else 838 aijmkl->no_SpMV2 = PETSC_TRUE; 839 #endif 840 aijmkl->eager_inspection = PETSC_FALSE; 841 842 /* Parse command line options. */ 843 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 844 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 845 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 846 ierr = PetscOptionsEnd();CHKERRQ(ierr); 847 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 848 if(!aijmkl->no_SpMV2) { 849 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 850 aijmkl->no_SpMV2 = PETSC_TRUE; 851 } 852 #endif 853 854 if(!aijmkl->no_SpMV2) { 855 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 856 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 857 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 858 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 859 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 860 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 861 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 862 #endif 863 } else { 864 B->ops->mult = MatMult_SeqAIJMKL; 865 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 866 B->ops->multadd = MatMultAdd_SeqAIJMKL; 867 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 868 } 869 870 B->ops->scale = MatScale_SeqAIJMKL; 871 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 872 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 873 B->ops->axpy = MatAXPY_SeqAIJMKL; 874 875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 880 if(!aijmkl->no_SpMV2) { 881 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 884 #endif 885 } 886 887 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 888 *newmat = B; 889 PetscFunctionReturn(0); 890 } 891 892 /*@C 893 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 894 This type inherits from AIJ and is largely identical, but uses sparse BLAS 895 routines from Intel MKL whenever possible. 896 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 897 operations are currently supported. 898 If the installed version of MKL supports the "SpMV2" sparse 899 inspector-executor routines, then those are used by default. 900 901 Collective on MPI_Comm 902 903 Input Parameters: 904 + comm - MPI communicator, set to PETSC_COMM_SELF 905 . m - number of rows 906 . n - number of columns 907 . nz - number of nonzeros per row (same for all rows) 908 - nnz - array containing the number of nonzeros in the various rows 909 (possibly different for each row) or NULL 910 911 Output Parameter: 912 . A - the matrix 913 914 Options Database Keys: 915 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 916 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 917 918 Notes: 919 If nnz is given then nz is ignored 920 921 Level: intermediate 922 923 .keywords: matrix, MKL, sparse, parallel 924 925 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 926 @*/ 927 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 928 { 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 ierr = MatCreate(comm,A);CHKERRQ(ierr); 933 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 934 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 935 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 940 { 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 945 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948