1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 52 B->ops->scale = MatScale_SeqAIJ; 53 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 54 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 55 B->ops->axpy = MatAXPY_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 if(!aijmkl->no_SpMV2) { 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #endif 66 } 67 68 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69 * simply involves destroying the MKL sparse matrix handle and then freeing 70 * the spptr pointer. */ 71 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 72 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 73 74 if (aijmkl->sparse_optimized) { 75 sparse_status_t stat; 76 stat = mkl_sparse_destroy(aijmkl->csrA); 77 if (stat != SPARSE_STATUS_SUCCESS) { 78 PetscFunctionReturn(PETSC_ERR_LIB); 79 } 80 } 81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 83 84 /* Change the type of B to MATSEQAIJ. */ 85 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 86 87 *newmat = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92 { 93 PetscErrorCode ierr; 94 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 95 96 PetscFunctionBegin; 97 98 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99 * spptr pointer. */ 100 if (aijmkl) { 101 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 102 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 103 if (aijmkl->sparse_optimized) { 104 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 105 stat = mkl_sparse_destroy(aijmkl->csrA); 106 if (stat != SPARSE_STATUS_SUCCESS) { 107 PetscFunctionReturn(PETSC_ERR_LIB); 108 } 109 } 110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 111 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 112 } 113 114 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 115 * to destroy everything that remains. */ 116 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 117 /* Note that I don't call MatSetType(). I believe this is because that 118 * is only to be called when *building* a matrix. I could be wrong, but 119 * that is how things work for the SuperLU matrix class. */ 120 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 125 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 126 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 127 * handle, creates a new one, and then calls mkl_sparse_optimize(). 128 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 129 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 130 * an unoptimized matrix handle here. */ 131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 132 { 133 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 134 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 135 * does nothing. We make it callable anyway in this case because it cuts 136 * down on littering the code with #ifdefs. */ 137 PetscFunctionBegin; 138 PetscFunctionReturn(0); 139 #else 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 141 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 142 PetscInt m,n; 143 MatScalar *aa; 144 PetscInt *aj,*ai; 145 sparse_status_t stat; 146 147 PetscFunctionBegin; 148 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) { 155 PetscFunctionReturn(PETSC_ERR_LIB); 156 } 157 } 158 aijmkl->sparse_optimized = PETSC_FALSE; 159 160 /* Now perform the SpMV2 setup and matrix optimization. */ 161 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 164 m = A->rmap->n; 165 n = A->cmap->n; 166 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 167 aa = a->a; /* Nonzero elements stored row-by-row. */ 168 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 169 if ((a->nz!=0) & !(A->structure_only)) { 170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 172 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 173 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 stat = mkl_sparse_optimize(aijmkl->csrA); 176 if (stat != SPARSE_STATUS_SUCCESS) { 177 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 178 PetscFunctionReturn(PETSC_ERR_LIB); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 } 182 183 PetscFunctionReturn(0); 184 #endif 185 } 186 187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 188 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 189 * matrix handle. 190 * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity 191 * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult() 192 * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case. 193 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 194 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 195 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 196 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 197 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 199 { 200 PetscErrorCode ierr; 201 sparse_status_t stat; 202 sparse_index_base_t indexing; 203 PetscInt nrows, ncols; 204 PetscInt *aj,*ai,*dummy; 205 MatScalar *aa; 206 Mat A; 207 Mat_SeqAIJ *a; 208 Mat_SeqAIJMKL *aijmkl; 209 210 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 211 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 212 if (stat != SPARSE_STATUS_SUCCESS) { 213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 214 PetscFunctionReturn(PETSC_ERR_LIB); 215 } 216 217 if (reuse == MAT_INITIAL_MATRIX) { 218 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 219 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 220 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 221 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 222 223 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 224 * Now turn it into a MATSEQAIJMKL. */ 225 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 226 } else { 227 A = *mat; 228 } 229 230 a = (Mat_SeqAIJ*)A->data; 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 233 if (reuse == MAT_REUSE_MATRIX) { 234 /* Need to destroy old MKL handle. */ 235 stat = mkl_sparse_destroy(aijmkl->csrA); 236 if (stat != SPARSE_STATUS_SUCCESS) { 237 PetscFunctionReturn(PETSC_ERR_LIB); 238 } 239 240 /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */ 241 ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr); 242 } 243 aijmkl->csrA = csrA; 244 245 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 246 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 247 * and just need to be able to run the MKL optimization step. */ 248 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 249 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 250 stat = mkl_sparse_optimize(aijmkl->csrA); 251 if (stat != SPARSE_STATUS_SUCCESS) { 252 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 253 PetscFunctionReturn(PETSC_ERR_LIB); 254 } 255 aijmkl->sparse_optimized = PETSC_TRUE; 256 257 *mat = A; 258 PetscFunctionReturn(0); 259 } 260 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 261 262 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 263 { 264 PetscErrorCode ierr; 265 Mat_SeqAIJMKL *aijmkl; 266 Mat_SeqAIJMKL *aijmkl_dest; 267 268 PetscFunctionBegin; 269 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 270 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 271 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 272 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 273 aijmkl_dest->sparse_optimized = PETSC_FALSE; 274 if (aijmkl->eager_inspection) { 275 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 276 } 277 PetscFunctionReturn(0); 278 } 279 280 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 281 { 282 PetscErrorCode ierr; 283 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 284 Mat_SeqAIJMKL *aijmkl; 285 286 PetscFunctionBegin; 287 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 288 289 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 290 * extra information and some different methods, call the AssemblyEnd 291 * routine for a MATSEQAIJ. 292 * I'm not sure if this is the best way to do this, but it avoids 293 * a lot of code duplication. */ 294 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 295 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 296 297 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 298 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 299 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 300 if (aijmkl->eager_inspection) { 301 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 302 } 303 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 308 { 309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 310 const PetscScalar *x; 311 PetscScalar *y; 312 const MatScalar *aa; 313 PetscErrorCode ierr; 314 PetscInt m=A->rmap->n; 315 PetscInt n=A->cmap->n; 316 PetscScalar alpha = 1.0; 317 PetscScalar beta = 0.0; 318 const PetscInt *aj,*ai; 319 char matdescra[6]; 320 321 322 /* Variables not in MatMult_SeqAIJ. */ 323 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 324 325 PetscFunctionBegin; 326 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 327 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 328 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 329 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 330 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 331 aa = a->a; /* Nonzero elements stored row-by-row. */ 332 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 333 334 /* Call MKL sparse BLAS routine to do the MatMult. */ 335 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 336 337 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 338 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 344 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 345 { 346 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 347 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 348 const PetscScalar *x; 349 PetscScalar *y; 350 PetscErrorCode ierr; 351 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 352 353 PetscFunctionBegin; 354 355 /* If there are no nonzero entries, zero yy and return immediately. */ 356 if(!a->nz) { 357 PetscInt i; 358 PetscInt m=A->rmap->n; 359 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 360 for (i=0; i<m; i++) { 361 y[i] = 0.0; 362 } 363 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 367 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 368 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369 370 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 371 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 372 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 373 if (!aijmkl->sparse_optimized) { 374 MatSeqAIJMKL_create_mkl_handle(A); 375 } 376 377 /* Call MKL SpMV2 executor routine to do the MatMult. */ 378 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 379 380 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 381 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 382 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 383 if (stat != SPARSE_STATUS_SUCCESS) { 384 PetscFunctionReturn(PETSC_ERR_LIB); 385 } 386 PetscFunctionReturn(0); 387 } 388 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 389 390 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 391 { 392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 393 const PetscScalar *x; 394 PetscScalar *y; 395 const MatScalar *aa; 396 PetscErrorCode ierr; 397 PetscInt m=A->rmap->n; 398 PetscInt n=A->cmap->n; 399 PetscScalar alpha = 1.0; 400 PetscScalar beta = 0.0; 401 const PetscInt *aj,*ai; 402 char matdescra[6]; 403 404 /* Variables not in MatMultTranspose_SeqAIJ. */ 405 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 406 407 PetscFunctionBegin; 408 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 409 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 410 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 412 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 413 aa = a->a; /* Nonzero elements stored row-by-row. */ 414 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 415 416 /* Call MKL sparse BLAS routine to do the MatMult. */ 417 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 418 419 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 426 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 427 { 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 429 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 430 const PetscScalar *x; 431 PetscScalar *y; 432 PetscErrorCode ierr; 433 sparse_status_t stat; 434 435 PetscFunctionBegin; 436 437 /* If there are no nonzero entries, zero yy and return immediately. */ 438 if(!a->nz) { 439 PetscInt i; 440 PetscInt n=A->cmap->n; 441 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442 for (i=0; i<n; i++) { 443 y[i] = 0.0; 444 } 445 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 451 452 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 453 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 454 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 455 if (!aijmkl->sparse_optimized) { 456 MatSeqAIJMKL_create_mkl_handle(A); 457 } 458 459 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 460 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 461 462 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 464 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 465 if (stat != SPARSE_STATUS_SUCCESS) { 466 PetscFunctionReturn(PETSC_ERR_LIB); 467 } 468 PetscFunctionReturn(0); 469 } 470 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 471 472 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 475 const PetscScalar *x; 476 PetscScalar *y,*z; 477 const MatScalar *aa; 478 PetscErrorCode ierr; 479 PetscInt m=A->rmap->n; 480 PetscInt n=A->cmap->n; 481 const PetscInt *aj,*ai; 482 PetscInt i; 483 484 /* Variables not in MatMultAdd_SeqAIJ. */ 485 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 486 PetscScalar alpha = 1.0; 487 PetscScalar beta; 488 char matdescra[6]; 489 490 PetscFunctionBegin; 491 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 492 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 493 494 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 495 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 496 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 497 aa = a->a; /* Nonzero elements stored row-by-row. */ 498 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 499 500 /* Call MKL sparse BLAS routine to do the MatMult. */ 501 if (zz == yy) { 502 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 503 beta = 1.0; 504 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 505 } else { 506 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 507 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 508 beta = 0.0; 509 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 510 for (i=0; i<m; i++) { 511 z[i] += y[i]; 512 } 513 } 514 515 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 516 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 517 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 522 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 523 { 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 525 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 526 const PetscScalar *x; 527 PetscScalar *y,*z; 528 PetscErrorCode ierr; 529 PetscInt m=A->rmap->n; 530 PetscInt i; 531 532 /* Variables not in MatMultAdd_SeqAIJ. */ 533 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 534 535 PetscFunctionBegin; 536 537 /* If there are no nonzero entries, set zz = yy and return immediately. */ 538 if(!a->nz) { 539 PetscInt i; 540 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 541 for (i=0; i<m; i++) { 542 z[i] = y[i]; 543 } 544 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 549 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 550 551 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 552 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 553 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 554 if (!aijmkl->sparse_optimized) { 555 MatSeqAIJMKL_create_mkl_handle(A); 556 } 557 558 /* Call MKL sparse BLAS routine to do the MatMult. */ 559 if (zz == yy) { 560 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 561 * with alpha and beta both set to 1.0. */ 562 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 563 } else { 564 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 565 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 566 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 567 for (i=0; i<m; i++) { 568 z[i] += y[i]; 569 } 570 } 571 572 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 573 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 574 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 575 if (stat != SPARSE_STATUS_SUCCESS) { 576 PetscFunctionReturn(PETSC_ERR_LIB); 577 } 578 PetscFunctionReturn(0); 579 } 580 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 581 582 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 const PetscScalar *x; 586 PetscScalar *y,*z; 587 const MatScalar *aa; 588 PetscErrorCode ierr; 589 PetscInt m=A->rmap->n; 590 PetscInt n=A->cmap->n; 591 const PetscInt *aj,*ai; 592 PetscInt i; 593 594 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 595 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 596 PetscScalar alpha = 1.0; 597 PetscScalar beta; 598 char matdescra[6]; 599 600 PetscFunctionBegin; 601 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 602 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 603 604 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 606 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 607 aa = a->a; /* Nonzero elements stored row-by-row. */ 608 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 609 610 /* Call MKL sparse BLAS routine to do the MatMult. */ 611 if (zz == yy) { 612 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 613 beta = 1.0; 614 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 615 } else { 616 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 617 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 618 beta = 0.0; 619 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 620 for (i=0; i<n; i++) { 621 z[i] += y[i]; 622 } 623 } 624 625 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 626 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 627 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 632 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 633 { 634 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 635 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 636 const PetscScalar *x; 637 PetscScalar *y,*z; 638 PetscErrorCode ierr; 639 PetscInt n=A->cmap->n; 640 PetscInt i; 641 642 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 643 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 644 645 PetscFunctionBegin; 646 647 /* If there are no nonzero entries, set zz = yy and return immediately. */ 648 if(!a->nz) { 649 PetscInt i; 650 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 651 for (i=0; i<n; i++) { 652 z[i] = y[i]; 653 } 654 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 659 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 660 661 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 662 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 663 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 664 if (!aijmkl->sparse_optimized) { 665 MatSeqAIJMKL_create_mkl_handle(A); 666 } 667 668 /* Call MKL sparse BLAS routine to do the MatMult. */ 669 if (zz == yy) { 670 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 671 * with alpha and beta both set to 1.0. */ 672 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 673 } else { 674 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 675 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 676 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 677 for (i=0; i<n; i++) { 678 z[i] += y[i]; 679 } 680 } 681 682 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 683 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 684 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 if (stat != SPARSE_STATUS_SUCCESS) { 686 PetscFunctionReturn(PETSC_ERR_LIB); 687 } 688 PetscFunctionReturn(0); 689 } 690 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 691 692 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 693 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 694 { 695 Mat_SeqAIJMKL *a, *b; 696 sparse_matrix_t csrA, csrB, csrC; 697 PetscErrorCode ierr; 698 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 699 700 PetscFunctionBegin; 701 a = (Mat_SeqAIJMKL*)A->spptr; 702 b = (Mat_SeqAIJMKL*)B->spptr; 703 if (!a->sparse_optimized) { 704 MatSeqAIJMKL_create_mkl_handle(A); 705 } 706 if (!b->sparse_optimized) { 707 MatSeqAIJMKL_create_mkl_handle(B); 708 } 709 csrA = a->csrA; 710 csrB = b->csrA; 711 712 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 713 if (stat != SPARSE_STATUS_SUCCESS) { 714 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 715 PetscFunctionReturn(PETSC_ERR_LIB); 716 } 717 718 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 719 720 PetscFunctionReturn(0); 721 } 722 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 723 724 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 725 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 726 { 727 Mat_SeqAIJMKL *a, *b; 728 sparse_matrix_t csrA, csrB, csrC; 729 PetscErrorCode ierr; 730 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 731 732 PetscFunctionBegin; 733 a = (Mat_SeqAIJMKL*)A->spptr; 734 b = (Mat_SeqAIJMKL*)B->spptr; 735 if (!a->sparse_optimized) { 736 MatSeqAIJMKL_create_mkl_handle(A); 737 } 738 if (!b->sparse_optimized) { 739 MatSeqAIJMKL_create_mkl_handle(B); 740 } 741 csrA = a->csrA; 742 csrB = b->csrA; 743 744 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 745 if (stat != SPARSE_STATUS_SUCCESS) { 746 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 747 PetscFunctionReturn(PETSC_ERR_LIB); 748 } 749 750 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 751 752 PetscFunctionReturn(0); 753 } 754 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 755 756 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 757 { 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 762 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 767 { 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 772 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 777 { 778 PetscErrorCode ierr; 779 780 PetscFunctionBegin; 781 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 782 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 783 PetscFunctionReturn(0); 784 } 785 786 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 787 { 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 792 if (str == SAME_NONZERO_PATTERN) { 793 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 794 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 800 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 801 * routine, but can also be used to convert an assembled SeqAIJ matrix 802 * into a SeqAIJMKL one. */ 803 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 804 { 805 PetscErrorCode ierr; 806 Mat B = *newmat; 807 Mat_SeqAIJMKL *aijmkl; 808 PetscBool set; 809 PetscBool sametype; 810 811 PetscFunctionBegin; 812 if (reuse == MAT_INITIAL_MATRIX) { 813 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 814 } 815 816 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 817 if (sametype) PetscFunctionReturn(0); 818 819 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 820 B->spptr = (void*) aijmkl; 821 822 /* Set function pointers for methods that we inherit from AIJ but override. 823 * We also parse some command line options below, since those determine some of the methods we point to. */ 824 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 825 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 826 B->ops->destroy = MatDestroy_SeqAIJMKL; 827 828 aijmkl->sparse_optimized = PETSC_FALSE; 829 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 830 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 831 #else 832 aijmkl->no_SpMV2 = PETSC_TRUE; 833 #endif 834 aijmkl->eager_inspection = PETSC_FALSE; 835 836 /* Parse command line options. */ 837 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 838 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 839 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 840 ierr = PetscOptionsEnd();CHKERRQ(ierr); 841 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 842 if(!aijmkl->no_SpMV2) { 843 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 844 aijmkl->no_SpMV2 = PETSC_TRUE; 845 } 846 #endif 847 848 if(!aijmkl->no_SpMV2) { 849 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 850 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 851 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 852 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 853 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 854 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 855 B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 856 #endif 857 } else { 858 B->ops->mult = MatMult_SeqAIJMKL; 859 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 860 B->ops->multadd = MatMultAdd_SeqAIJMKL; 861 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 862 } 863 864 B->ops->scale = MatScale_SeqAIJMKL; 865 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 866 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 867 B->ops->axpy = MatAXPY_SeqAIJMKL; 868 869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 874 if(!aijmkl->no_SpMV2) { 875 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 878 #endif 879 } 880 881 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 882 *newmat = B; 883 PetscFunctionReturn(0); 884 } 885 886 /*@C 887 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 888 This type inherits from AIJ and is largely identical, but uses sparse BLAS 889 routines from Intel MKL whenever possible. 890 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 891 operations are currently supported. 892 If the installed version of MKL supports the "SpMV2" sparse 893 inspector-executor routines, then those are used by default. 894 895 Collective on MPI_Comm 896 897 Input Parameters: 898 + comm - MPI communicator, set to PETSC_COMM_SELF 899 . m - number of rows 900 . n - number of columns 901 . nz - number of nonzeros per row (same for all rows) 902 - nnz - array containing the number of nonzeros in the various rows 903 (possibly different for each row) or NULL 904 905 Output Parameter: 906 . A - the matrix 907 908 Options Database Keys: 909 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 910 911 Notes: 912 If nnz is given then nz is ignored 913 914 Level: intermediate 915 916 .keywords: matrix, MKL, sparse, parallel 917 918 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 919 @*/ 920 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 921 { 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 ierr = MatCreate(comm,A);CHKERRQ(ierr); 926 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 927 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 928 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 933 { 934 PetscErrorCode ierr; 935 936 PetscFunctionBegin; 937 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 938 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941