1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 52 B->ops->scale = MatScale_SeqAIJ; 53 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 54 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 55 B->ops->axpy = MatAXPY_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 if(!aijmkl->no_SpMV2) { 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #endif 66 } 67 68 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69 * simply involves destroying the MKL sparse matrix handle and then freeing 70 * the spptr pointer. */ 71 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 72 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 73 74 if (aijmkl->sparse_optimized) { 75 sparse_status_t stat; 76 stat = mkl_sparse_destroy(aijmkl->csrA); 77 if (stat != SPARSE_STATUS_SUCCESS) { 78 PetscFunctionReturn(PETSC_ERR_LIB); 79 } 80 } 81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 83 84 /* Change the type of B to MATSEQAIJ. */ 85 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 86 87 *newmat = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92 { 93 PetscErrorCode ierr; 94 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 95 96 PetscFunctionBegin; 97 98 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99 * spptr pointer. */ 100 if (aijmkl) { 101 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 102 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 103 if (aijmkl->sparse_optimized) { 104 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 105 stat = mkl_sparse_destroy(aijmkl->csrA); 106 if (stat != SPARSE_STATUS_SUCCESS) { 107 PetscFunctionReturn(PETSC_ERR_LIB); 108 } 109 } 110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 111 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 112 } 113 114 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 115 * to destroy everything that remains. */ 116 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 117 /* Note that I don't call MatSetType(). I believe this is because that 118 * is only to be called when *building* a matrix. I could be wrong, but 119 * that is how things work for the SuperLU matrix class. */ 120 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 125 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 126 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 127 * handle, creates a new one, and then calls mkl_sparse_optimize(). 128 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 129 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 130 * an unoptimized matrix handle here. */ 131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 132 { 133 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 134 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 135 * does nothing. We make it callable anyway in this case because it cuts 136 * down on littering the code with #ifdefs. */ 137 PetscFunctionBegin; 138 PetscFunctionReturn(0); 139 #else 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 141 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 142 PetscInt m,n; 143 MatScalar *aa; 144 PetscInt *aj,*ai; 145 sparse_status_t stat; 146 147 PetscFunctionBegin; 148 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) { 155 PetscFunctionReturn(PETSC_ERR_LIB); 156 } 157 } 158 aijmkl->sparse_optimized = PETSC_FALSE; 159 160 /* Now perform the SpMV2 setup and matrix optimization. */ 161 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 164 m = A->rmap->n; 165 n = A->cmap->n; 166 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 167 aa = a->a; /* Nonzero elements stored row-by-row. */ 168 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 169 if ((a->nz!=0) & !(A->structure_only)) { 170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 172 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 173 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 stat = mkl_sparse_optimize(aijmkl->csrA); 176 if (stat != SPARSE_STATUS_SUCCESS) { 177 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 178 PetscFunctionReturn(PETSC_ERR_LIB); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 } 182 183 PetscFunctionReturn(0); 184 #endif 185 } 186 187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 188 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 189 * matrix handle. 190 * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity 191 * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult() 192 * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case. 193 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 194 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 195 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 196 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 197 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 199 { 200 PetscErrorCode ierr; 201 sparse_status_t stat; 202 sparse_index_base_t indexing; 203 PetscInt nrows, ncols; 204 PetscInt *aj,*ai,*dummy; 205 MatScalar *aa; 206 Mat A; 207 Mat_SeqAIJ *a; 208 Mat_SeqAIJMKL *aijmkl; 209 210 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 211 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 212 if (stat != SPARSE_STATUS_SUCCESS) { 213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 214 PetscFunctionReturn(PETSC_ERR_LIB); 215 } 216 217 if (reuse == MAT_INITIAL_MATRIX) { 218 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 219 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 220 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 221 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 222 223 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 224 * Now turn it into a MATSEQAIJMKL. */ 225 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 226 } else { 227 A = *mat; 228 } 229 230 a = (Mat_SeqAIJ*)A->data; 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 233 if (reuse == MAT_REUSE_MATRIX) { 234 /* Need to destroy old MKL handle. */ 235 stat = mkl_sparse_destroy(aijmkl->csrA); 236 if (stat != SPARSE_STATUS_SUCCESS) { 237 PetscFunctionReturn(PETSC_ERR_LIB); 238 } 239 240 /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */ 241 ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr); 242 } 243 aijmkl->csrA = csrA; 244 245 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 246 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 247 * and just need to be able to run the MKL optimization step. */ 248 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 249 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 250 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 251 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 252 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 253 stat = mkl_sparse_optimize(aijmkl->csrA); 254 if (stat != SPARSE_STATUS_SUCCESS) { 255 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 256 PetscFunctionReturn(PETSC_ERR_LIB); 257 } 258 aijmkl->sparse_optimized = PETSC_TRUE; 259 260 *mat = A; 261 PetscFunctionReturn(0); 262 } 263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 264 265 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 266 { 267 PetscErrorCode ierr; 268 Mat_SeqAIJMKL *aijmkl; 269 Mat_SeqAIJMKL *aijmkl_dest; 270 271 PetscFunctionBegin; 272 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 273 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 274 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 275 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 276 aijmkl_dest->sparse_optimized = PETSC_FALSE; 277 if (aijmkl->eager_inspection) { 278 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 279 } 280 PetscFunctionReturn(0); 281 } 282 283 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 284 { 285 PetscErrorCode ierr; 286 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 287 Mat_SeqAIJMKL *aijmkl; 288 289 PetscFunctionBegin; 290 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 291 292 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 293 * extra information and some different methods, call the AssemblyEnd 294 * routine for a MATSEQAIJ. 295 * I'm not sure if this is the best way to do this, but it avoids 296 * a lot of code duplication. */ 297 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 298 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 299 300 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 301 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 302 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 303 if (aijmkl->eager_inspection) { 304 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 305 } else if (aijmkl->sparse_optimized) { 306 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 307 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 308 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 309 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 310 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 311 if (stat != SPARSE_STATUS_SUCCESS) { 312 PetscFunctionReturn(PETSC_ERR_LIB); 313 } 314 aijmkl->sparse_optimized = PETSC_FALSE; 315 } 316 317 PetscFunctionReturn(0); 318 } 319 320 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 321 { 322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 323 const PetscScalar *x; 324 PetscScalar *y; 325 const MatScalar *aa; 326 PetscErrorCode ierr; 327 PetscInt m=A->rmap->n; 328 PetscInt n=A->cmap->n; 329 PetscScalar alpha = 1.0; 330 PetscScalar beta = 0.0; 331 const PetscInt *aj,*ai; 332 char matdescra[6]; 333 334 335 /* Variables not in MatMult_SeqAIJ. */ 336 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 337 338 PetscFunctionBegin; 339 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 340 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 341 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 342 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 343 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 344 aa = a->a; /* Nonzero elements stored row-by-row. */ 345 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 346 347 /* Call MKL sparse BLAS routine to do the MatMult. */ 348 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 349 350 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 357 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 358 { 359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 360 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 361 const PetscScalar *x; 362 PetscScalar *y; 363 PetscErrorCode ierr; 364 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 365 366 PetscFunctionBegin; 367 368 /* If there are no nonzero entries, zero yy and return immediately. */ 369 if(!a->nz) { 370 PetscInt i; 371 PetscInt m=A->rmap->n; 372 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 373 for (i=0; i<m; i++) { 374 y[i] = 0.0; 375 } 376 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 381 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 382 383 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 384 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 385 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 386 if (!aijmkl->sparse_optimized) { 387 MatSeqAIJMKL_create_mkl_handle(A); 388 } 389 390 /* Call MKL SpMV2 executor routine to do the MatMult. */ 391 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 392 393 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 394 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 395 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 396 if (stat != SPARSE_STATUS_SUCCESS) { 397 PetscFunctionReturn(PETSC_ERR_LIB); 398 } 399 PetscFunctionReturn(0); 400 } 401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 402 403 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 404 { 405 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 406 const PetscScalar *x; 407 PetscScalar *y; 408 const MatScalar *aa; 409 PetscErrorCode ierr; 410 PetscInt m=A->rmap->n; 411 PetscInt n=A->cmap->n; 412 PetscScalar alpha = 1.0; 413 PetscScalar beta = 0.0; 414 const PetscInt *aj,*ai; 415 char matdescra[6]; 416 417 /* Variables not in MatMultTranspose_SeqAIJ. */ 418 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 419 420 PetscFunctionBegin; 421 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 422 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 423 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 424 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 425 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 426 aa = a->a; /* Nonzero elements stored row-by-row. */ 427 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 428 429 /* Call MKL sparse BLAS routine to do the MatMult. */ 430 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 431 432 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 434 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 440 { 441 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 442 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 443 const PetscScalar *x; 444 PetscScalar *y; 445 PetscErrorCode ierr; 446 sparse_status_t stat; 447 448 PetscFunctionBegin; 449 450 /* If there are no nonzero entries, zero yy and return immediately. */ 451 if(!a->nz) { 452 PetscInt i; 453 PetscInt n=A->cmap->n; 454 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 455 for (i=0; i<n; i++) { 456 y[i] = 0.0; 457 } 458 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 464 465 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 466 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 467 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 468 if (!aijmkl->sparse_optimized) { 469 MatSeqAIJMKL_create_mkl_handle(A); 470 } 471 472 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 473 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 474 475 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 476 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 477 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 478 if (stat != SPARSE_STATUS_SUCCESS) { 479 PetscFunctionReturn(PETSC_ERR_LIB); 480 } 481 PetscFunctionReturn(0); 482 } 483 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 484 485 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 const PetscScalar *x; 489 PetscScalar *y,*z; 490 const MatScalar *aa; 491 PetscErrorCode ierr; 492 PetscInt m=A->rmap->n; 493 PetscInt n=A->cmap->n; 494 const PetscInt *aj,*ai; 495 PetscInt i; 496 497 /* Variables not in MatMultAdd_SeqAIJ. */ 498 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 499 PetscScalar alpha = 1.0; 500 PetscScalar beta; 501 char matdescra[6]; 502 503 PetscFunctionBegin; 504 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 505 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 506 507 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 508 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 509 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 510 aa = a->a; /* Nonzero elements stored row-by-row. */ 511 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 512 513 /* Call MKL sparse BLAS routine to do the MatMult. */ 514 if (zz == yy) { 515 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 516 beta = 1.0; 517 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 518 } else { 519 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 520 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 521 beta = 0.0; 522 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 523 for (i=0; i<m; i++) { 524 z[i] += y[i]; 525 } 526 } 527 528 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 530 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 535 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 536 { 537 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 538 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 539 const PetscScalar *x; 540 PetscScalar *y,*z; 541 PetscErrorCode ierr; 542 PetscInt m=A->rmap->n; 543 PetscInt i; 544 545 /* Variables not in MatMultAdd_SeqAIJ. */ 546 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 547 548 PetscFunctionBegin; 549 550 /* If there are no nonzero entries, set zz = yy and return immediately. */ 551 if(!a->nz) { 552 PetscInt i; 553 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 554 for (i=0; i<m; i++) { 555 z[i] = y[i]; 556 } 557 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 562 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 563 564 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 565 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 566 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 567 if (!aijmkl->sparse_optimized) { 568 MatSeqAIJMKL_create_mkl_handle(A); 569 } 570 571 /* Call MKL sparse BLAS routine to do the MatMult. */ 572 if (zz == yy) { 573 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 574 * with alpha and beta both set to 1.0. */ 575 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 576 } else { 577 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 578 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 579 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 580 for (i=0; i<m; i++) { 581 z[i] += y[i]; 582 } 583 } 584 585 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 586 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 587 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 588 if (stat != SPARSE_STATUS_SUCCESS) { 589 PetscFunctionReturn(PETSC_ERR_LIB); 590 } 591 PetscFunctionReturn(0); 592 } 593 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 594 595 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 598 const PetscScalar *x; 599 PetscScalar *y,*z; 600 const MatScalar *aa; 601 PetscErrorCode ierr; 602 PetscInt m=A->rmap->n; 603 PetscInt n=A->cmap->n; 604 const PetscInt *aj,*ai; 605 PetscInt i; 606 607 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 608 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 609 PetscScalar alpha = 1.0; 610 PetscScalar beta; 611 char matdescra[6]; 612 613 PetscFunctionBegin; 614 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 615 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 616 617 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 618 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 619 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 620 aa = a->a; /* Nonzero elements stored row-by-row. */ 621 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 622 623 /* Call MKL sparse BLAS routine to do the MatMult. */ 624 if (zz == yy) { 625 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 626 beta = 1.0; 627 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 628 } else { 629 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 630 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 631 beta = 0.0; 632 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 633 for (i=0; i<n; i++) { 634 z[i] += y[i]; 635 } 636 } 637 638 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 639 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 640 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 645 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 649 const PetscScalar *x; 650 PetscScalar *y,*z; 651 PetscErrorCode ierr; 652 PetscInt n=A->cmap->n; 653 PetscInt i; 654 655 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 656 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 657 658 PetscFunctionBegin; 659 660 /* If there are no nonzero entries, set zz = yy and return immediately. */ 661 if(!a->nz) { 662 PetscInt i; 663 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 664 for (i=0; i<n; i++) { 665 z[i] = y[i]; 666 } 667 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 672 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 673 674 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 675 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 676 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 677 if (!aijmkl->sparse_optimized) { 678 MatSeqAIJMKL_create_mkl_handle(A); 679 } 680 681 /* Call MKL sparse BLAS routine to do the MatMult. */ 682 if (zz == yy) { 683 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 684 * with alpha and beta both set to 1.0. */ 685 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 686 } else { 687 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 688 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 689 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 690 for (i=0; i<n; i++) { 691 z[i] += y[i]; 692 } 693 } 694 695 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 696 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 697 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 698 if (stat != SPARSE_STATUS_SUCCESS) { 699 PetscFunctionReturn(PETSC_ERR_LIB); 700 } 701 PetscFunctionReturn(0); 702 } 703 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 704 705 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 706 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 707 { 708 Mat_SeqAIJMKL *a, *b; 709 sparse_matrix_t csrA, csrB, csrC; 710 PetscErrorCode ierr; 711 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 712 713 PetscFunctionBegin; 714 a = (Mat_SeqAIJMKL*)A->spptr; 715 b = (Mat_SeqAIJMKL*)B->spptr; 716 if (!a->sparse_optimized) { 717 MatSeqAIJMKL_create_mkl_handle(A); 718 } 719 if (!b->sparse_optimized) { 720 MatSeqAIJMKL_create_mkl_handle(B); 721 } 722 csrA = a->csrA; 723 csrB = b->csrA; 724 725 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 726 if (stat != SPARSE_STATUS_SUCCESS) { 727 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 728 PetscFunctionReturn(PETSC_ERR_LIB); 729 } 730 731 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 732 733 PetscFunctionReturn(0); 734 } 735 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 736 737 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 738 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 739 { 740 Mat_SeqAIJMKL *a, *b; 741 sparse_matrix_t csrA, csrB, csrC; 742 PetscErrorCode ierr; 743 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 744 745 PetscFunctionBegin; 746 a = (Mat_SeqAIJMKL*)A->spptr; 747 b = (Mat_SeqAIJMKL*)B->spptr; 748 if (!a->sparse_optimized) { 749 MatSeqAIJMKL_create_mkl_handle(A); 750 } 751 if (!b->sparse_optimized) { 752 MatSeqAIJMKL_create_mkl_handle(B); 753 } 754 csrA = a->csrA; 755 csrB = b->csrA; 756 757 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 758 if (stat != SPARSE_STATUS_SUCCESS) { 759 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 760 PetscFunctionReturn(PETSC_ERR_LIB); 761 } 762 763 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 764 765 PetscFunctionReturn(0); 766 } 767 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 768 769 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 770 { 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 775 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 780 { 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 785 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 790 { 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 795 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 800 { 801 PetscErrorCode ierr; 802 803 PetscFunctionBegin; 804 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 805 if (str == SAME_NONZERO_PATTERN) { 806 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 807 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 808 } 809 PetscFunctionReturn(0); 810 } 811 812 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 813 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 814 * routine, but can also be used to convert an assembled SeqAIJ matrix 815 * into a SeqAIJMKL one. */ 816 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 817 { 818 PetscErrorCode ierr; 819 Mat B = *newmat; 820 Mat_SeqAIJMKL *aijmkl; 821 PetscBool set; 822 PetscBool sametype; 823 824 PetscFunctionBegin; 825 if (reuse == MAT_INITIAL_MATRIX) { 826 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 827 } 828 829 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 830 if (sametype) PetscFunctionReturn(0); 831 832 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 833 B->spptr = (void*) aijmkl; 834 835 /* Set function pointers for methods that we inherit from AIJ but override. 836 * We also parse some command line options below, since those determine some of the methods we point to. */ 837 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 838 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 839 B->ops->destroy = MatDestroy_SeqAIJMKL; 840 841 aijmkl->sparse_optimized = PETSC_FALSE; 842 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 843 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 844 #else 845 aijmkl->no_SpMV2 = PETSC_TRUE; 846 #endif 847 aijmkl->eager_inspection = PETSC_FALSE; 848 849 /* Parse command line options. */ 850 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 851 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 852 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 853 ierr = PetscOptionsEnd();CHKERRQ(ierr); 854 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 855 if(!aijmkl->no_SpMV2) { 856 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 857 aijmkl->no_SpMV2 = PETSC_TRUE; 858 } 859 #endif 860 861 if(!aijmkl->no_SpMV2) { 862 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 863 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 864 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 865 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 866 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 867 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 868 B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 869 #endif 870 } else { 871 B->ops->mult = MatMult_SeqAIJMKL; 872 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 873 B->ops->multadd = MatMultAdd_SeqAIJMKL; 874 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 875 } 876 877 B->ops->scale = MatScale_SeqAIJMKL; 878 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 879 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 880 B->ops->axpy = MatAXPY_SeqAIJMKL; 881 882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 887 if(!aijmkl->no_SpMV2) { 888 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 891 #endif 892 } 893 894 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 895 *newmat = B; 896 PetscFunctionReturn(0); 897 } 898 899 /*@C 900 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 901 This type inherits from AIJ and is largely identical, but uses sparse BLAS 902 routines from Intel MKL whenever possible. 903 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 904 operations are currently supported. 905 If the installed version of MKL supports the "SpMV2" sparse 906 inspector-executor routines, then those are used by default. 907 908 Collective on MPI_Comm 909 910 Input Parameters: 911 + comm - MPI communicator, set to PETSC_COMM_SELF 912 . m - number of rows 913 . n - number of columns 914 . nz - number of nonzeros per row (same for all rows) 915 - nnz - array containing the number of nonzeros in the various rows 916 (possibly different for each row) or NULL 917 918 Output Parameter: 919 . A - the matrix 920 921 Options Database Keys: 922 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 923 924 Notes: 925 If nnz is given then nz is ignored 926 927 Level: intermediate 928 929 .keywords: matrix, MKL, sparse, parallel 930 931 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 932 @*/ 933 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 934 { 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 ierr = MatCreate(comm,A);CHKERRQ(ierr); 939 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 940 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 941 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 946 { 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 951 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954